Exemple #1
0
def main():
    #session = initDB()
    logAttDic = {#get self name
                 'name': 'httpsource testing',
                 'level': 'DEBUG',
                 'format':'Simple',
                 'handler':'ConsoleAppender',}
    logList = []
    reticLog.addLogger(logList,logAttDic)
    src_args = {}
    src_args['name'] = "testing httpsource"
    src_args['URL'] = "http://www.tceq.state.tx.us/cgi-bin/compliance/monops/crp/sampquery.pl"
    src_args['pollPeriod'] = 12.45
    src_args['params'] = [{"filetype":EVENT, "basinid":"0510","year":"2005"}]
    src_args['params'].append({"filetype":RESULT, "basinid":"0510","year":"2005"})
    sampleSource = HTTPSource.source(src_args,logList)
    sampleSource.start()
    #echoResult = []
    while(sampleSource.next()==1):
        raw_input("Content of this URL:  %s" % sampleSource.URL)
        print sampleSource.msg
        #add stringIO here
        sampleSource.commit()
    #sampleSource.commit()
    raw_input("here,more message")
    for msg in sampleSource.msg:
        print msg
Exemple #2
0
 def __init__(self, adtName, srcConfName, pipeConfName, sinkConfName, logConfName=config[0], firstTimeRun=True):
     self.adtName = adtName
     self.firstTimeRun = firstTimeRun
     # assert not logConfName
     # get log config parameter dictionary
     self.logList = []
     # initialize log(s) according to parameters.
     log_args = get_conf_attr(logConfName)
     for attDict in log_args:
         reticLog.addLogger(self.logList, attDict)
     # source object
     self.adaptorSource = HTTPSource.source(get_conf_attr(srcConfName, "source"), self.logList)
     self.interval = self.adaptorSource.interval
     # no pipe object for TCEQ
     # sink obj
     self.adaptorSink = SQLSink_forTCEQ.sink(get_conf_attr(sinkConfName, "sink"), self.logList)
def main():
    session = initDB()
    #logger setup. Here, simply set a consloe logger
    logAttDic = {'name': 'TCEQ sites and parameters importing for the first time',
                 'level': 'DEBUG',
                 'format':'Simple',
                 'handler':'ConsoleAppender'}
    logList = []
    reticLog.addLogger(logList,logAttDic)
    #get sites list (a text file) from an HTTPSource
    # and insert all the sites into the "Sites" table of ODM database
    siteSrc_args = {}
    siteSrc_args['name'] = "TCEQ sites httpsource"
    siteSrc_args['URL'] = "ftp://ftp.tceq.state.tx.us/pub/WaterResourceManagement/WaterQuality/DataCollection/CleanRivers/public/stations.txt"
    sitesHTTPSource = HTTPSource.source(siteSrc_args,logList)
    sitesHTTPSource.start()
    while(sitesHTTPSource.next()==1):
        print "Content of this URL:  %s" % sitesHTTPSource.URL
        sitesFile = StringIO(sitesHTTPSource.msg[0])
        sitesListReader = csv.reader(sitesFile, delimiter='|')
        for index,row in enumerate(sitesListReader):
            if index == 0:
                continue
            try:
                newRecordSiteName = row[SITENAME] if len(row[SITENAME]) <= 255 else row[SITENAME][0:255]
                where = and_(Sites.SiteCode == unicode(row[SITECODE])
                              ,Sites.Latitude == float(row[LATITUDE])
                              ,Sites.Longitude == float(row[LONGITUDE]))
                valueExist = session.query(Sites).filter(where).one()
                print "find record with SiteCode %s in database, skip it..." % row[SITECODE]
                continue
            #this site record does not exist, so insert it
            except NoResultFound, e:
                # This is for system robust
                # retries is max number of insertion times, and execOk is to show whether update is successful
                retries,execOk = 5,0                
                newSiteRecord = Sites(row[SITECODE],newRecordSiteName,
                                      float(row[LATITUDE]),float(row[LONGITUDE]),row[COUNTY], \
                                      ";".join(["HUC 8 = ",row[HUC],"EPA_Type1 = ",row[TYPE1],"EPA_Type2 = ",row[TYPE2]]))
                while retries >= 0 and execOk == 0:
                    try:
                        ############
                        newSiteRecord.SiteID = getMaxId(session,"Sites")
                        session.add(newSiteRecord)
                        session.flush()
                        execOk = 1
                    #this is the handler for some violation of unique constriant on keys
                    except exc.OperationalError:
                        print "DB constraint violation happen"
                        session.rollback()
                        retries = retries - 1
                        continue
                    #this is the handler or invalid request error
                    except exc.InvalidRequestError:
                        print "DB constraint violation happen"
                        session.rollback()
                        retries = retries - 1
                        continue
                if execOk == 0 and retries < 0:
                    raise "Database Exception : all retries failed"
                elif execOk == 1:
                    print "inert new Site record with SiteCode ==> %s" % row[SITECODE]
                else:
                    errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                    reticLog.logWarning(logList, "Sites Table in Database Update failed : " + errorMessage)
        #unit of work pattern, only commit one time
        try:                                            
            session.commit()
            reticLog.logInfo(logList, "( " + "TCEQ Sites" + " ) Update commited")                    
        except:
            session.rollback()
            reticLog.logWarning(logList, "Commit Failed in SQLSink")
        sitesHTTPSource.commit()