def main(): #session = initDB() logAttDic = {#get self name 'name': 'httpsource testing', 'level': 'DEBUG', 'format':'Simple', 'handler':'ConsoleAppender',} logList = [] reticLog.addLogger(logList,logAttDic) src_args = {} src_args['name'] = "testing httpsource" src_args['URL'] = "http://www.tceq.state.tx.us/cgi-bin/compliance/monops/crp/sampquery.pl" src_args['pollPeriod'] = 12.45 src_args['params'] = [{"filetype":EVENT, "basinid":"0510","year":"2005"}] src_args['params'].append({"filetype":RESULT, "basinid":"0510","year":"2005"}) sampleSource = HTTPSource.source(src_args,logList) sampleSource.start() #echoResult = [] while(sampleSource.next()==1): raw_input("Content of this URL: %s" % sampleSource.URL) print sampleSource.msg #add stringIO here sampleSource.commit() #sampleSource.commit() raw_input("here,more message") for msg in sampleSource.msg: print msg
def __init__(self, adtName, srcConfName, pipeConfName, sinkConfName, logConfName=config[0], firstTimeRun=True): self.adtName = adtName self.firstTimeRun = firstTimeRun # assert not logConfName # get log config parameter dictionary self.logList = [] # initialize log(s) according to parameters. log_args = get_conf_attr(logConfName) for attDict in log_args: reticLog.addLogger(self.logList, attDict) # source object self.adaptorSource = HTTPSource.source(get_conf_attr(srcConfName, "source"), self.logList) self.interval = self.adaptorSource.interval # no pipe object for TCEQ # sink obj self.adaptorSink = SQLSink_forTCEQ.sink(get_conf_attr(sinkConfName, "sink"), self.logList)
def main(): session = initDB() #logger setup. Here, simply set a consloe logger logAttDic = {'name': 'TCEQ sites and parameters importing for the first time', 'level': 'DEBUG', 'format':'Simple', 'handler':'ConsoleAppender'} logList = [] reticLog.addLogger(logList,logAttDic) #get sites list (a text file) from an HTTPSource # and insert all the sites into the "Sites" table of ODM database siteSrc_args = {} siteSrc_args['name'] = "TCEQ sites httpsource" siteSrc_args['URL'] = "ftp://ftp.tceq.state.tx.us/pub/WaterResourceManagement/WaterQuality/DataCollection/CleanRivers/public/stations.txt" sitesHTTPSource = HTTPSource.source(siteSrc_args,logList) sitesHTTPSource.start() while(sitesHTTPSource.next()==1): print "Content of this URL: %s" % sitesHTTPSource.URL sitesFile = StringIO(sitesHTTPSource.msg[0]) sitesListReader = csv.reader(sitesFile, delimiter='|') for index,row in enumerate(sitesListReader): if index == 0: continue try: newRecordSiteName = row[SITENAME] if len(row[SITENAME]) <= 255 else row[SITENAME][0:255] where = and_(Sites.SiteCode == unicode(row[SITECODE]) ,Sites.Latitude == float(row[LATITUDE]) ,Sites.Longitude == float(row[LONGITUDE])) valueExist = session.query(Sites).filter(where).one() print "find record with SiteCode %s in database, skip it..." % row[SITECODE] continue #this site record does not exist, so insert it except NoResultFound, e: # This is for system robust # retries is max number of insertion times, and execOk is to show whether update is successful retries,execOk = 5,0 newSiteRecord = Sites(row[SITECODE],newRecordSiteName, float(row[LATITUDE]),float(row[LONGITUDE]),row[COUNTY], \ ";".join(["HUC 8 = ",row[HUC],"EPA_Type1 = ",row[TYPE1],"EPA_Type2 = ",row[TYPE2]])) while retries >= 0 and execOk == 0: try: ############ newSiteRecord.SiteID = getMaxId(session,"Sites") session.add(newSiteRecord) session.flush() execOk = 1 #this is the handler for some violation of unique constriant on keys except exc.OperationalError: print "DB constraint violation happen" session.rollback() retries = retries - 1 continue #this is the handler or invalid request error except exc.InvalidRequestError: print "DB constraint violation happen" session.rollback() retries = retries - 1 continue if execOk == 0 and retries < 0: raise "Database Exception : all retries failed" elif execOk == 1: print "inert new Site record with SiteCode ==> %s" % row[SITECODE] else: errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0] reticLog.logWarning(logList, "Sites Table in Database Update failed : " + errorMessage) #unit of work pattern, only commit one time try: session.commit() reticLog.logInfo(logList, "( " + "TCEQ Sites" + " ) Update commited") except: session.rollback() reticLog.logWarning(logList, "Commit Failed in SQLSink") sitesHTTPSource.commit()