def generateTaskList(): Data = ("NeedsLoc") #CMD = "SELECT UID, URL from URLs WHERE Status like %s ORDER BY UDate DESC LIMIT 20" CMD = "SELECT UID, URL from URLs WHERE Status like %s and USource in (select siteSource from sourceLocales) ORDER BY UDate DESC LIMIT 60" resultList = sigTools.dbExecution(CMD, Data) return resultList
def main(): pageURL = "http://www.thedailytimes.com/news/second-suspect-arrested-in-weekend-richy-kreme-robbery/article_2a3ef1fa-c5c3-11e3-a2cb-001a4bcf887a.html" #readResponse = grabContent(pageURL) #data = json.loads(readResponse) #print data resultList = buildList() for item in resultList: UID = item[0] pageURL = item[1] print UID try: readResponse = grabContent(pageURL) data = json.loads(readResponse) pageDate = data['date'] finalDate = datetime.strptime(pageDate, "%a, %d %b %Y %H:%M:%S %Z") resultList = updateEntry(finalDate, UID) print " " + str(finalDate) print " " + str(resultList) except: errorMsg = "NeedHumanWork" Data = (errorMsg, UID) CMD = "UPDATE URLs SET Status = %s WHERE UID = %s" resultList = sigTools.dbExecution(CMD, Data) print " Error: " + str(resultList)
def buildList(): nullVal = None Data = ("0000-00-00%", nullVal) CMD = "select UID, URL from URLs where DateDate like %s and Status is %s limit 8" resultList = sigTools.dbExecution(CMD, Data) return resultList[2]
def main(): resultList = generateTaskList() for item in resultList[2]: cleanDateList = [] UID = item[0] pageURL = item[1] pageURL = pageURL.replace("&", "&") cleanDateList = findDateInURL(pageURL) if len(cleanDateList) > 0: print UID + " :: " + pageURL print " " + str(cleanDateList) finalDate = cleanDateList[0] try: now = datetime.datetime.now() currentDate = now.strftime("%Y-%m-%d %H:%M:%S") Data = (finalDate, 'Bot-URL', currentDate, UID) CMD = "UPDATE URLs SET DateDate = %s, DateDater = %s, DateDated = %s WHERE UID = %s" #print (CMD, Data) resultList = sigTools.dbExecution(CMD, Data) print " " + str(resultList) except: print "Bad Entry -- " + UID
def main(): resultList = generateTaskList() for item in resultList[2]: cleanDateList = [] UID = item[0] pageURL = item[1] pageURL = pageURL.replace("&","&") cleanDateList = findDateInURL(pageURL) if len(cleanDateList) > 0: print UID + " :: " + pageURL print " " + str(cleanDateList) finalDate = cleanDateList[0] try: now = datetime.datetime.now() currentDate = now.strftime("%Y-%m-%d %H:%M:%S") Data = (finalDate, 'Bot-URL', currentDate, UID) CMD = "UPDATE URLs SET DateDate = %s, DateDater = %s, DateDated = %s WHERE UID = %s" #print (CMD, Data) resultList = sigTools.dbExecution(CMD, Data) print " " + str(resultList) except: print "Bad Entry -- " + UID
def updateEntry(finalDate, UID): now = datetime.now() currentDate = now.strftime("%Y-%m-%d %H:%M:%S") Data = (finalDate, 'diffBot', currentDate, UID) CMD = "UPDATE URLs SET DateDate = %s, DateDater = %s, DateDated = %s WHERE UID = %s" resultList = sigTools.dbExecution(CMD, Data) return resultList
def getFeedList(): Data = () # CMD = "select feedID, feedURL from RSSFeeds order by rand() limit 1" # CMD = "select feedID, feedURL from RSSFeeds where lastGrab is NULL order by rand() limit 1" # CMD = "select feedID, feedURL from RSSFeeds where feedID = 1" CMD = "select feedID, feedURL from RSSFeeds order by lastGrab Asc limit 1" resultList = sigTools.dbExecution(CMD, Data) return resultList[2]
def updateGrabTime(feedID): now = datetime.now() currentDate = now.strftime("%Y-%m-%d %H:%M:%S") Data = (currentDate, feedID) CMD = "update RSSFeeds set lastGrab = %s where feedID = %s" resultList = sigTools.dbExecution(CMD, Data) return resultList
def getFeedList(): Data = () #CMD = "select feedID, feedURL from RSSFeeds order by rand() limit 1" #CMD = "select feedID, feedURL from RSSFeeds where lastGrab is NULL order by rand() limit 1" #CMD = "select feedID, feedURL from RSSFeeds where feedID = 1" CMD = "select feedID, feedURL from RSSFeeds order by lastGrab Asc limit 1" resultList = sigTools.dbExecution(CMD, Data) return resultList[2]
def updateLocation(UID, placeLat, placeLon, exactness): currentDate = getDate() nullVal = "Unassigned" emptyVal = "" Data = (UID, placeLat, placeLon, exactness, 'modWorkYHOO', currentDate, nullVal, emptyVal, nullVal) CMD = "insert into Locations values (%s, %s, %s, %s, %s, %s, %s, %s, %s)" resultList = sigTools.dbExecution(CMD, Data) return resultList
def lookupWOEID(pageURL): polishedSrc = sigTools.getSource(pageURL) print " " + polishedSrc Data = (polishedSrc) CMD = "select siteWOEID, siteLat, siteLon from sourceLocales where siteSource = %s" resultList = sigTools.dbExecution(CMD, Data) return resultList
def appendEvent(UID, itemDate, itemTitle, itemDesc, itemURL): rightNow = datetime.now() currentDate = rightNow.strftime("%Y-%m-%d %H:%M:%S") DateDated = currentDate DateDater = "RSSFeed" Origin = "RSS-Feed" Status = "NeedsLoc" emptyField = "" Data = (UID, DateDated, itemDate, DateDated, DateDater, itemTitle, emptyField, itemDesc, itemURL, Origin, Status) CMD = "insert into URLs (UID, UDate, DateDate, DateDated, " CMD += "DateDater, UTitle, UError, UFirstP, URL, Origin, Status" CMD += ") values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" resultList = sigTools.dbExecution(CMD, Data) return resultList
def appendEvent(UID, itemDate, itemTitle, itemDesc, itemURL): rightNow = datetime.now() currentDate = rightNow.strftime("%Y-%m-%d %H:%M:%S") DateDated = currentDate DateDater = "RSSFeed" Origin = "RSS-Feed" Status = 'NeedsLoc' emptyField = "" Data = (UID, DateDated, itemDate, DateDated, DateDater, itemTitle, emptyField, itemDesc, itemURL, Origin, Status) CMD = "insert into URLs (UID, UDate, DateDate, DateDated, " CMD += "DateDater, UTitle, UError, UFirstP, URL, Origin, Status" CMD += ") values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" resultList = sigTools.dbExecution(CMD, Data) return resultList
def dropOldLocations(UID): Data = (UID) CMD = "delete from Locations where UID = %s" resultList = sigTools.dbExecution(CMD, Data) return resultList
def updateStatus(UID, Status): Data = (Status, UID) CMD = "update URLs set Status = %s where UID = %s" resultList = sigTools.dbExecution(CMD, Data) return resultList
def dropOldResponses(UID): Data = (UID) CMD = "delete from apiResponses where UID = %s" resultList = sigTools.dbExecution(CMD, Data) return resultList
def generateTaskList(): Data = () CMD = "SELECT URLs.UID, URLs.URL FROM URLs WHERE URLs.DateDate LIKE '0000-00-00 00:00:00'" resultList = sigTools.dbExecution(CMD, Data) return resultList
def main(): resultList = generateTaskList() taskList = resultList[2] for item in taskList: try: UID = item[0] pageURL = item[1] print UID print " " + pageURL resultList = dropOldResponses(UID) print " " + str(resultList) pageURL = pageURL.decode('us-ascii', 'ignore') readResponse = grabReadbility(pageURL) readResponse = readResponse.decode('us-ascii', 'ignore') articleDetails = prepareArticleContent(readResponse) pageHeadline = articleDetails[0] pageExcerpt = articleDetails[1] pageContent = articleDetails[2] rawContent = articleDetails[3] try: geoResponse, siteLat, siteLon = runGeocode(pageURL, rawContent) locationList = extractLocation(geoResponse) placeLat = locationList[0] placeLon = locationList[1] exactness = locationList[2] print " " + str(placeLat) + ", " + str(placeLon) resultList = dropOldLocations(UID) print " " + str(resultList) resultList = updateLocation(UID, placeLat, placeLon, exactness) print " " + str(resultList) nullVal = None resultList = updateStatus(UID, nullVal) print " " + str(resultList) except: print " Location Error" errorVal = 'LocationError' resultList = updateStatus(UID, errorVal) print " " + str(resultList) geoResponse = None currentDate = getDate() Data = (UID, pageURL, readResponse, pageContent, geoResponse, currentDate) CMD = "INSERT INTO apiResponses VALUES (%s, %s, %s, %s, %s, %s)" resultList = sigTools.dbExecution(CMD, Data) print " " + str(resultList) except urllib2.HTTPError, e: errorMsg = "ErrorErrorError" + str(e.code) Data = (errorMsg, UID) CMD = "UPDATE URLs SET Status = %s WHERE UID = %s" resultList = sigTools.dbExecution(CMD, Data) print " " + str(resultList) print " HTTPError = " + str(e.code)
def updateSource(polishedSrc, UID): Data= (polishedSrc, UID) CMD = "update URLs set USource = %s where UID = %s" resultList = sigTools.dbExecution(CMD, Data) return resultList
def generateTaskList(): Data = ('NULL') CMD = "select UID, URL from URLs where length(USource) < 1 or USource is NULL or USource = %s" resultList = sigTools.dbExecution(CMD, Data) return resultList