Ejemplo n.º 1
0
    def processSites (self,recordList,sitesName):
        try:
            for record in recordList:
                sites_exist = None
                try:
                    where = and_(testTPWDmodel.Sites.Latitude==record['start_latitude_num'], 
                             testTPWDmodel.Sites.Longitude==record['start_longitude_num'])
                    sites_exist = self.session.query(testTPWDmodel.Sites).filter(where).one()
                except NoResultFound, e:
                    newSite = testTPWDmodel.Sites()
                    newSite.SiteID = self.getMaxId('Sites')
#                    mkey = md5.new()
#                    mkey.update(str(record['start_latitude_num'])+str(record['start_longitude_num']))
#                    mStr = mkey.hexdigest()
                    SiteCode_I = ''.join([major_area_str,record['major_area_code'],
                            minor_bay_str,record['minor_bay_code'],
                            station_str,record['station_code']])
                    newSite.SiteCode = unicode('_'.join([SiteCode_I,str(newSite.SiteID)]))
                    #here, changing to name-looking function later
                    #sites name is based on the csv file name processed 
                    newSite.SiteName = TPWDSitesDict[sitesName.split('_')[0]]
                    ###################################
                    newSite.Latitude,newSite.Longitude = float(record['start_latitude_num']),float(record['start_longitude_num'])
                    newSite.LatLongDatumID= 2
                    newSite.VerticalDatum = u'Unknown'
                    newSite.State = u'Texas'
                    self.session.add(newSite)
                    self.session.flush()
                    record['SiteID'] = newSite.SiteID
                else:
                    record['SiteID'] = sites_exist.SiteID
            self.session.commit()
            reticLog.logInfo(self.logList, "( " + self.name + " ) sites info processed on sink : " + self.name)
            return 0
Ejemplo n.º 2
0
 def __init__ (self, args, logList):
     try:
         self.logList = []
         self.logList = logList
         self.name = args['name']
         self.exitOnError = 'y'
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Intitializing fileSource")
         self.fileFilter = args['fileFilter']
         self.newExtension = args['newExtension']
         self.msgList = []
         self.metadata = {}
         self.nbMsg = 0
         self.msg = ''
         self.msgName = ''
         self.filePath = args['filePath']
         #this is used for wait(interval) function, so use float
         self.interval = float(args['pollPeriod'])
         #here,determine the os path seperator, '\\' for nt, '/' for linux
         import os
         self.filePath = self.filePath + os.sep
         if args.has_key('exitOnError'):
             self.exitOnError = args['exitOnError']
     except KeyError:
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on fileSource initialization")
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Parameter " + str(sys.exc_info()[1]) + " is missing on source definition" )
         sys.exit(1)
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Unknown error on initialization on source")                        
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
         sys.exit(1)
Ejemplo n.º 3
0
 def __init__ (self, args, logger):
     try:
         self.logList = logger
         self.name = args['name']
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Intitializing HTTPSource : " + self.name)
         self.URL = args['URL']
         self.exitOnError = 'n'
         self.msgList = []
         self.nbMsg = 0
         self.msg = []
         self.msgName = ''
         self.params = []
         self.metadata = {}
         #this is used for wait(interval) function, so use float
         if args.has_key('pollPeriod'):
             self.interval = float(args['pollPeriod'])
         if args.has_key('params'):
             self.params = args['params']
         if args.has_key('exitOnError'):
             self.exitOnError = args['exitOnError']
     except KeyError:
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on HTTPSource initialization")
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Parameter " + str(sys.exc_info()[1]) + " is missing on source definition" )
         sys.exit(1)
     except:
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Unknown error on HTTPSource initialization")
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1]))
         raise
         sys.exit(1)
Ejemplo n.º 4
0
Archivo: toXML.py Proyecto: twdb/txhis
 def process (self):
     'Creates the XML file in the output buffer'
     try:
         self.tmpMsg = StringIO.StringIO()
         self.updateAttributesFromMetadata()
         if self.hasHeader == 'y':
             self.fieldNames = self.getHeader()
         self.tmpMsg.write('<?xml version=\"1.0\" encoding=\"' + self.encoding + '\"?>')
         self.tmpMsg.write('\n<' + self.rootTag + '>\n')
         current_record = self.InMsg.readline()
         #print current_record
         while len(current_record) > 0 :
             #print current_record
             if current_record[0] == '#' :
                 current_record = self.InMsg.readline()
                 continue
             if current_record[-1] == '\n':
                 self.writeRecordAsXML(current_record[:-1])
             else:
                 self.writeRecordAsXML(current_record)
             current_record = self.InMsg.readline()                    
         self.tmpMsg.write('</' + self.rootTag + '>')
         self.tmpMsg.seek(0)
         self.msg = self.tmpMsg.read()
         #print self.msg
         self.msgList.append(self.msg)
         #print type(self.msgList)
         reticLog.logInfo(self.logList, '(' + self.name + ') ' + "Message process is finished in pipe")
         return 0
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, '(' + self.name + ') ' + "Error during message processing in pipe")
         reticLog.logError(self.logList, '(' + self.name + ') ' + errorMessage)
         return 1
Ejemplo n.º 5
0
def preprocess(filename,loglist):
    """
    function that does preprocessing TPWD file.(in CSV format)
    main functionality:
    1. extract columns needed.
    2. combine repetitive rows (after extract the needed columns)
    3. sort all rows on chronological order
    4. from the results of step 1) 2) 3); save a copy of the csv file,
       and add a timestamp at the end of it
    for this specific case (TPWD), there are 2 more operations performed: 
    a) transfer time into correct date format;
    b) transfer longitude and latitude into correct decimal format
    """
    #open a csv reader
    TPWDReader = csv.reader(open(filename))
    #start processing
    reticLog.logInfo(loglist, "start preprocessing %s......" % filename.split(os.sep)[-1])
    #extract the columns needed:
    #more pythonic way:
    #print "extracting needed columns......",
    extractedList = list(list(row[i] for i in NEEDED_COL) for row in TPWDReader)
    #print "done"
    #transfer date
    #headerRow : the name of all the columns, do not need to change.
    #recordRows : actual data part, need to change
    headerRow,recordRows = extractedList[0],extractedList[1:]
    #sort the output list on timely basis
    #if times are the same, sort on latitude,longitude
    #print "sort rows based on completion time......",   
   
    #print "done"
    #eliminate duplicate rows
    #print "eliminating duplicated rows......",
    groupByList=list(row for row,group in itertools.groupby(recordRows))
    groupByList.sort(date_compare)
    #print "done"     
    #convert time to datetime format
    #convert longitude, latitude to decimal format
    #print "convert date and geological coordinates......", 
    for row in groupByList:
        row[NEEDED_COL.index(DATE)]= str(datetime.datetime.strptime(row[NEEDED_COL.index(DATE)][:-4],'%d%b%Y:%H:%M:%S'))  
        row[NEEDED_COL.index(LONGITUDE)] = GotoDecimal(row[NEEDED_COL.index(LONGITUDE)],True)
        row[NEEDED_COL.index(LATITUDE)] = GotoDecimal(row[NEEDED_COL.index(LATITUDE)])
    #print "done"
    #now add header row back
    groupByList.insert(0,headerRow)    
    #print "%d non-duplicated records generated" % len(groupByList)
    #outputfile name process: add time stamp to the 
    fileSplit = filename.split('.')
    #here, timestamp precesion: minutes, concatenated with underscore 
    fileSplit[0] = string.join([fileSplit[0].split('_')[-2],datetime.datetime.now().strftime("%Y%m%d%H%M")],'_')
    outFileName = ''.join([localPath,
                             string.join(fileSplit,'.').split(os.sep)[-1]])
    #write output file
    #print "write output file with timestamp (%s)......"  % outFileName,
    #Write in binary mode to avoid the extra newline character 
    outWriter = csv.writer(open(outFileName, "wb"))
    outWriter.writerows(groupByList)    
    reticLog.logInfo(loglist, "preprocessing %s is done." % filename.split(os.sep)[-1])
Ejemplo n.º 6
0
    def getMsg (self, messages):
        """
        for TCEQ, every time, it passes in 2 message StringIO object, one for event, another for result
        """
        try:
            reticLog.logInfo(self.logList, "( " + self.name + " ) Retrieving message for sink : " + self.name)
            # Re-initialize msg to get new message
            # eventString IO is message[0], resultStringIO is messag[1]
            # get a mini hash table for each segment:
            # key: RFA tag id, 
            # value: [event list, result list]
            self.basinSegmentInfo = {}       
            eventCSVList = csv.reader(messages[0],delimiter="|")
            resultCSVLIst = csv.reader(messages[1],delimiter="|")     
#            raw_input("print event...")
#            for row in eventCSVList:
#                print row
#            raw_input("print result...")
#            for row in resultCSVLIst:
#                print row
            for row in eventCSVList:
                #this is for basiID 6, year 2010
                if (row[0] == "" and row[1] == "" and row[2] == ""):
                    row = row[3:]
                    print row
                self.basinSegmentInfo[row[RFATAG_COLUMN]] = {}
                self.basinSegmentInfo[row[RFATAG_COLUMN]][EVENT_IN_HASHTable] = row
            for row in resultCSVLIst: 
                if self.basinSegmentInfo[row[RFATAG_COLUMN]].has_key(RESULT_IN_HASHTable):
                    self.basinSegmentInfo[row[RFATAG_COLUMN]][RESULT_IN_HASHTable].append(row)    
                else:
                    #if this is the first result row for this RFATAG
                    self.basinSegmentInfo[row[RFATAG_COLUMN]][RESULT_IN_HASHTable] = [row]                    
#            resultCounter,eventCounter = 0,0
#            for key in self.basinSegmentInfo.keys():
#                #print "key => ",self.basinSegmentInfo[key]
#                eventCounter += 1
#                for resultRow in  self.basinSegmentInfo[key][RESULT_IN_HASHTable]:
#                    #print resultRow
#                    resultCounter += 1
#            #print "%d result in Total......" % eventCounter
#            print "%d result in Total......" % resultCounter
            reticLog.logInfo(self.logList, "( " + self.name + " ) Message retrieved in sink : " + self.name)          
            return 0
        except Exception, e:
            import traceback
            #if row[RFATAG_COLUMN] in self.basinSegmentInfo:
            #    print "In Dictionary Already!"
            #else:
            #    print "Not In Dictionary!"    
            traceback.print_exc(file=sys.stdout)
            errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
            reticLog.logError(self.logList, "( " + self.name + " ) Error during message retrieval in sink : " + self.name)
            reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage)
            return 1                    
Ejemplo n.º 7
0
 def start (self):
     'Start the source of the adaptor (begin work...)'
     try:
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Starting the source adaptor")
         self.getMsg()        
         return 0
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Unknown error on start of source")                        
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
         sys.exit(1)
Ejemplo n.º 8
0
 def prepareUpdateObject(self, recordList,processInfo):
     reticLog.logInfo(self.logList, "( " + self.name + " ) prepareUpdateObject Start")                                
     #object list, to be add to session/real database
     objects = []
     for record in recordList:
         #here add sites name here for time stamp comparison            
         #not processed data, so we process here
         result = processInfo(record)
         for ob in result:
             objects.append(ob)            
     reticLog.logInfo(self.logList, "( " + self.name + " ) prepareUpdateObject End")
     print "%d data value objects generated" % len(objects)
     return objects
Ejemplo n.º 9
0
 def getMsg (self, message):
     try:
         reticLog.logInfo(self.logList, "( " + self.name + " ) Retrieving message for sink : " + self.name)
         # Re-initialize msg to get new message
         self.msg = ''
         self.msg = message
         reticLog.logInfo(self.logList, "( " + self.name + " ) Message retrieved in sink : " + self.name)          
         return 0
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, "( " + self.name + " ) Error during message retrieval in sink : " + self.name)
         reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage)
         return 1                    
Ejemplo n.º 10
0
Archivo: toXML.py Proyecto: twdb/txhis
 def getMsg (self, message):
     'Initializes input buffer with message content'
     try:
         reticLog.logInfo(self.logList, '(' + self.name + ') ' + "Getting message into pipe")
         self.InMsg = StringIO.StringIO()
         self.msg = ''
         self.InMsg.write(message)
         self.InMsg.seek(0)
         return 0
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, '(' + self.name + ') ' + "Error during message retrieval in pipe" )                        
         reticLog.logError(self.logList, '(' + self.name + ') ' + errorMessage)
         return 1
Ejemplo n.º 11
0
    def getRecordList(self):
        """Extraction of the fields and values to map to the SQL statement.
        The method returns a list of dictionnaries""" 
        reticLog.logInfo(self.logList, "( " + self.name + " ) getRecordList Start")        
        msg = StringIO.StringIO()
        msg.write(self.msg)
        #print msg
        msg.seek(0)
        recordList = []
        msgFormat = ''
        xmlRoot = None
        if self.metadata.has_key(msgFormat) : 
            if self.metadata['msgFormat'] == 'xml':
                xmlRoot = ElementTree(msg) 
                msgFormat = 'xml'
            else:
                msgFormat = 'flat'        
                

        if msgFormat == '':
            try:
                xmlRoot = ElementTree(file=msg)
                msgFormat = 'xml'
            except:
                msgFormat = 'flat'

        reticLog.logDebug(self.logList, "Input format detected :  " + msgFormat)
        # I am here
        if msgFormat == 'xml':
            recNb = 0
            #Create an iterator
            iter = xmlRoot.getiterator()
            #traverse the xml tree
            for element in iter:
                if element.getchildren():
                    for child in element.getchildren():
                        prepRecord = {}
                        if child.getchildren():
                            for subChild in child.getchildren():
                                prepRecord[subChild.tag] = subChild.text
                            recordList.append(prepRecord)                
            reticLog.logDebug(self.logList, "All records processed.")
        #here for processing flat file
        elif msgFormat == 'flat':
            raise Exception('Do not support flat file at this time')
                
        reticLog.logInfo(self.logList, "( " + self.name + " ) getRecordList End")                                
        return recordList
Ejemplo n.º 12
0
 def connect (self, args):
     'Establish a connection with the database'
     try:
         reticLog.logInfo(self.logList, "Intitializing Database Connection : " + args['dsn'])
         #construct connection string according to parameters
         engineStr = string.join([args['dbType'],
                                  ''.join(['//', args['user']],),
                                  ''.join([args['password'],"@",args['dsn']]) ],
                                  ':')
         #print engineStr
         dbDriverMod = __import__(args['driverName'])
         self.engine = create_engine(engineStr, module=dbDriverMod)
         reticLog.logInfo(self.logList, "DataBase Connection established")
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, "Database error : " + errorMessage ); raise "Database Error";
Ejemplo n.º 13
0
    def next(self):
        'Get the next message to be processed or return that sources are dry'
        if self.nbMsg == 0:
            return 0
        else:
            reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Messages Left on queue of adaptor : " + str(self.nbMsg))
             # No params are provided, processing raw URL. (without GET/POST request)
            if len(self.params) == 0:    
                reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Retrieving message from : " + self.URL)
                try:
                    request = urllib2.Request(self.URL)
                    connection = urllib2.urlopen(request)
                    self.msg.append(connection.read())
                    reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Message retrieved on adaptor: " + self.name)
                except:
                    errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                    reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on message retrieval on source : " + self.name)                        
                    reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
                    if self.exitOnError.lower() == 'y':
                        sys.exit(1)
            else:
                self.msg = []
                for param in self.params:
                    # Params are provided, processing URL passing them through GET method
                    # There are as many calls as there are param lists             
                    paramLine = '?'
                    for key in param.keys():
                        self.metadata[key] = param[key]
                    paramLine = urlencode(self.metadata)

                    reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Retrieving message from : " + self.URL+"?" + paramLine)
                    successful = False
                    while not successful:
                        try:
                            request = urllib2.Request("?".join([self.URL,paramLine]))
                            connection = urllib2.urlopen(request)
                            self.msg.append(connection.read())
                            reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Message retrieved on adaptor: " + self.name)
                        except:
                            errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                            reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on message retrieval on source : " + self.name)                        
                            reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
                            if self.exitOnError.lower() == 'y':
                                sys.exit(1)
                        else:
                            successful = True                            
            return 1
Ejemplo n.º 14
0
 def updateDB(self, objects):
     reticLog.logInfo(self.logList, "( " + self.name + " ) Starting update objects from sink : " + self.name)
     count = 0
     for ob in objects :
         retries = self.retries
         execOk = 0
         ob.ValueID = self.getMaxId('DV')
         try:
             where = and_(testTPWDmodel.DataValues.DataValue==ob.DataValue, 
                          testTPWDmodel.DataValues.LocalDateTime==ob.LocalDateTime,
                          testTPWDmodel.DataValues.SiteID==ob.SiteID,
                          testTPWDmodel.DataValues.VariableID==ob.VariableID)
             valueExist = self.session.query(testTPWDmodel.DataValues).filter(where).one()
         #this record does not exist,insert it
         except NoResultFound, e:
             while retries >= 0 and execOk == 0:
                 try:
                     ############
                     self.session.add(ob)
                     self.session.flush()
                     execOk = 1
                 #this is the handler for some violation of unique constriant on keys
                 except exc.OperationalError:
                     print "DB constraint violation happen"
                     self.session.rollback()
                     continue
                     #execOk = 0
                    # retries = retries - 1
                 #this is the handler or invalid request error
                 except exc.InvalidRequestError:
                     print "DB constraint violation happen"
                     self.session.rollback()
                     continue
                 #raise
                 if execOk == 0 and retries < 0:
                     raise "Database Exception : all retries failed"
                 elif execOk == 1:
                     print "recordNo == >", ob.ValueID, "generated"
                     count += 1
                 else:
                     errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                     reticLog.logWarning(self.logList, "Database Update failed : " + errorMessage)
         #this record exists, skip it
         else:
             print "record skipped"
             continue
Ejemplo n.º 15
0
 def commit(self):
     'Commit the current message treatment'
     if self.nbMsg > 0 : 
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Commiting msg " + self.msgList[0] + " on source : " + self.name)
         msgName = self.msgList[0]
         try:    
             #if self.newExtension != '' and self.newExtension != ' ':
                 #shutil.copyfile(self.filePath+self.msgName,self.filePath+self.msgName+self.newExtension)
             shutil.os.remove(self.filePath+self.msgName)
             self.nbMsg = self.nbMsg - 1 
             self.msgList = self.msgList[1:]
             self.msg = ''
             return 0
         except:
             errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
             reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on commit phase on source - File : " + msgName) 
             reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
             return 1
             
     else:
             return 1
Ejemplo n.º 16
0
def preprocess_batch(directoryName,loglist):
    """Find the fold under secific location(direcotryPath) with the largest time stamp;
       and preprocess each csv file inside it
    """
    reticLog.logInfo(loglist,"Starting TPWD preprocessing procedure")
    import glob
    #get the fold with the largest timestamp
    max_datetime = None
    folderToProcess = None
    for name in glob.glob('%s/request_*' % directoryName):
        import datetime
        year,month,day = int(name.split('request')[1][1:][0:4]), \
                         int(name.split('request')[1][1:][4:6]), \
                         int(name.split('request')[1][1:][6:8])
        tempDateTime = datetime.datetime(year,month,day)
        if (not max_datetime) or (tempDateTime > max_datetime): 
            max_datetime = tempDateTime
            folderToProcess = name
    #at this point, folderToProcess should have the folder name of the latest timestamp
    reticLog.logInfo(loglist, 'preprocess folder: %s, with requested time %s' % (folderToProcess,str(max_datetime)) )
    #so preprocess each file inside folderToProcess
    for name in glob.glob('%s/*.csv' % folderToProcess):
        preprocess(name,loglist)
Ejemplo n.º 17
0
 def next(self):
     'Get the next message to be processed or return that sources are dry'
     if self.nbMsg == 0:
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Sources dry on source ")
         return 0
     else:
         try:
             reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Messages Left on queue of adaptor : " + str(self.nbMsg))
             reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Retrieving file : " + self.msgList[0])
             self.msgName = self.msgList[0]
             dotIndex = string.find(self.msgName,'.')
             if dotIndex > 0:
                 if(not self.metadata.has_key('filename')):
                     self.metadata['filename'] = []
                 self.metadata['filename'].append(string.split(self.msgName,'.')[0])
                 self.metadata['extension'] = string.split(self.msgName,'.')[1]
             else:
                 if(not self.metadata.has_key('filename')):
                     self.metadata['filename'] = []
                 self.metadata['filename'] = self.msgName
                 self.metadata['extension'] = ''
             #file reading happend here
             fp = open(os.path.join(self.filePath,self.msgList[0]))
             if reticUtils.istext(fp):
                 fp.close()
                 fp = open(os.path.join(self.filePath,self.msgList[0]),'r')
             else:
                 fp.close()
                 fp = open(os.path.join(self.filePath,self.msgList[0]),'rb')                    
             self.msg = fp.read()
             fp.close()
             return 1
         except:
             errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
             reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on message retrieval on source : " + self.name)                        
             reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
             if self.exitOnError.lower() == 'y':
                 return 0
             else:
                 return 1
Ejemplo n.º 18
0
 def start (self):
     """Start the source of the adaptor (begin work...)"""
     reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Starting the http source adaptor")
     self.getMsg()
     return 0
Ejemplo n.º 19
0
 def updateDB(self,methodLookUpfile):
     reticLog.logInfo(self.logList, "( " + self.name + " ) Starting update objects from sink : " + self.name)
     count = 0
     for key in self.basinSegmentInfo.keys():
         #need this because for some years (eg: 1973), there is no data collected in result file
         if self.basinSegmentInfo[key].has_key(RESULT_IN_HASHTable) and len(self.basinSegmentInfo[key][RESULT_IN_HASHTable]) >= VALUE : 
             for resultRow in self.basinSegmentInfo[key][RESULT_IN_HASHTable]:
                 retries = self.retries
                 execOk = 0
                 try:
                     import datetime
                     ValueID = self.getMaxId('DV')
                     DataValue = float(resultRow[VALUE])
                     LocalDateTime = datetime.datetime.strptime(" ".join([self.basinSegmentInfo[key][EVENT_IN_HASHTable][DATE],
                                                                          self.basinSegmentInfo[key][EVENT_IN_HASHTable][TIME]]), 
                                                                          "%m/%d/%Y %H:%M") 
                     SiteID = self.lookUpSite(self.basinSegmentInfo[key][EVENT_IN_HASHTable][SITECODE])
                     VariableID = self.lookUpVariableID(resultRow[VARIABLECODE])
                     if not self.basinSegmentInfo[key][EVENT_IN_HASHTable][OFFSETDEPTH] == "":
                         OffsetValue = float(self.basinSegmentInfo[key][EVENT_IN_HASHTable][OFFSETDEPTH])
                     else:
                         OffsetValue = float(-9999)
                     CensorCode = u'nc'
                     #find method id
                     import anydbm
                     methodDBMfile = anydbm.open(methodLookUpfile, 'r')
                     MethodDescription = methodDBMfile[resultRow[VARIABLECODE]] 
                     MethodID = self.lookUpMethodID(MethodDescription)
                     #for production databse:
                     #SourceID = 1 
                     where = and_(DataValues.DataValue == DataValue, 
                                  DataValues.LocalDateTime== LocalDateTime,
                                  DataValues.SiteID== SiteID,
                                  DataValues.VariableID == VariableID,
                                  DataValues.OffsetValue == OffsetValue,
                                  DataValues.MethodID == MethodID)
                     valueExist = self.session.query(DataValues).filter(where).one() 
                 #this DataValue record does not exist,insert it
                 except NoResultFound, e:
                     while retries >= 0 and execOk == 0:
                         try:
                             ############
                             newDataValueRecord = DataValues(ValueID,DataValue,LocalDateTime,SiteID,VariableID,OffsetValue,MethodID)
                             self.session.add(newDataValueRecord)
                             self.session.flush()
                             execOk = 1
                         #this is the handler for some violation of unique constriant on keys
                         except exc.OperationalError:
                             print "DB constraint violation happen"
                             self.session.rollback()
                             #execOk = 0
                            # retries = retries - 1
                         #this is the handler or invalid request error
                         except exc.InvalidRequestError:
                             print "DB constraint violation happen"
                             self.session.rollback()
                         #raise
                         if execOk == 0 and retries < 0:
                             raise "Database Exception : all retries failed"
                         elif execOk == 1:
                             print "recordNo == >", newDataValueRecord.ValueID, "generated"
                             count += 1
                         else:
                             errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                             reticLog.logWarning(self.logList, "Database Update failed : " + errorMessage)
                 except TCEQRecordNotFoundError, e:
                      errorMessage = str(e)
                      reticLog.logWarning(self.logList, "Database Update failed : " + errorMessage)                
                 #this record exists, skip it
                 except Exception, e:
                     traceback.print_exc(file=sys.stdout)
                     print resultRow
                     raise 
                 else:
                     print "record found, need to skip this record (may be wrong behavior....)"
Ejemplo n.º 20
0
                             count += 1
                         else:
                             errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                             reticLog.logWarning(self.logList, "Database Update failed : " + errorMessage)
                 except TCEQRecordNotFoundError, e:
                      errorMessage = str(e)
                      reticLog.logWarning(self.logList, "Database Update failed : " + errorMessage)                
                 #this record exists, skip it
                 except Exception, e:
                     traceback.print_exc(file=sys.stdout)
                     print resultRow
                     raise 
                 else:
                     print "record found, need to skip this record (may be wrong behavior....)"
         #end big for loop
     reticLog.logInfo(self.logList, "( " + self.name + " ) Number DB record  (%d) added : " % count + self.name) 
     reticLog.logInfo(self.logList, "( " + self.name + " ) Update of Databases ended in sink : " + self.name)
     #unit of work pattern, only commit one time
     try:                                            
         self.session.commit()
         #self.session.close()
         reticLog.logInfo(self.logList, "( " + self.name + " ) Update commited")                    
     except:
         self.session.rollback()
         #self.session.close()
         reticLog.logWarning(self.logList, "Commit Failed in SQLSink")
         
         
 #auto generate new record for new DataValues table record    
 def getMaxId(self,tabFlag):
     maxid = 0
Ejemplo n.º 21
0
def main():
    session = initDB()
    #logger setup. Here, simply set a consloe logger
    logAttDic = {'name': 'TCEQ sites and parameters importing for the first time',
                 'level': 'DEBUG',
                 'format':'Simple',
                 'handler':'ConsoleAppender'}
    logList = []
    reticLog.addLogger(logList,logAttDic)
    #get sites list (a text file) from an HTTPSource
    # and insert all the sites into the "Sites" table of ODM database
    siteSrc_args = {}
    siteSrc_args['name'] = "TCEQ sites httpsource"
    siteSrc_args['URL'] = "ftp://ftp.tceq.state.tx.us/pub/WaterResourceManagement/WaterQuality/DataCollection/CleanRivers/public/stations.txt"
    sitesHTTPSource = HTTPSource.source(siteSrc_args,logList)
    sitesHTTPSource.start()
    while(sitesHTTPSource.next()==1):
        print "Content of this URL:  %s" % sitesHTTPSource.URL
        sitesFile = StringIO(sitesHTTPSource.msg[0])
        sitesListReader = csv.reader(sitesFile, delimiter='|')
        for index,row in enumerate(sitesListReader):
            if index == 0:
                continue
            try:
                newRecordSiteName = row[SITENAME] if len(row[SITENAME]) <= 255 else row[SITENAME][0:255]
                where = and_(Sites.SiteCode == unicode(row[SITECODE])
                              ,Sites.Latitude == float(row[LATITUDE])
                              ,Sites.Longitude == float(row[LONGITUDE]))
                valueExist = session.query(Sites).filter(where).one()
                print "find record with SiteCode %s in database, skip it..." % row[SITECODE]
                continue
            #this site record does not exist, so insert it
            except NoResultFound, e:
                # This is for system robust
                # retries is max number of insertion times, and execOk is to show whether update is successful
                retries,execOk = 5,0                
                newSiteRecord = Sites(row[SITECODE],newRecordSiteName,
                                      float(row[LATITUDE]),float(row[LONGITUDE]),row[COUNTY], \
                                      ";".join(["HUC 8 = ",row[HUC],"EPA_Type1 = ",row[TYPE1],"EPA_Type2 = ",row[TYPE2]]))
                while retries >= 0 and execOk == 0:
                    try:
                        ############
                        newSiteRecord.SiteID = getMaxId(session,"Sites")
                        session.add(newSiteRecord)
                        session.flush()
                        execOk = 1
                    #this is the handler for some violation of unique constriant on keys
                    except exc.OperationalError:
                        print "DB constraint violation happen"
                        session.rollback()
                        retries = retries - 1
                        continue
                    #this is the handler or invalid request error
                    except exc.InvalidRequestError:
                        print "DB constraint violation happen"
                        session.rollback()
                        retries = retries - 1
                        continue
                if execOk == 0 and retries < 0:
                    raise "Database Exception : all retries failed"
                elif execOk == 1:
                    print "inert new Site record with SiteCode ==> %s" % row[SITECODE]
                else:
                    errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                    reticLog.logWarning(logList, "Sites Table in Database Update failed : " + errorMessage)
        #unit of work pattern, only commit one time
        try:                                            
            session.commit()
            reticLog.logInfo(logList, "( " + "TCEQ Sites" + " ) Update commited")                    
        except:
            session.rollback()
            reticLog.logWarning(logList, "Commit Failed in SQLSink")
        sitesHTTPSource.commit()