def generateTaskList():
	Data = ("NeedsLoc")
	#CMD = "SELECT UID, URL from URLs WHERE Status like %s ORDER BY UDate DESC LIMIT 20"
	CMD = "SELECT UID, URL from URLs WHERE Status like %s and USource in (select siteSource from sourceLocales) ORDER BY UDate DESC LIMIT 60"
	resultList = sigTools.dbExecution(CMD, Data)

	return resultList
Esempio n. 2
0
def main():
    pageURL = "http://www.thedailytimes.com/news/second-suspect-arrested-in-weekend-richy-kreme-robbery/article_2a3ef1fa-c5c3-11e3-a2cb-001a4bcf887a.html"
    #readResponse = grabContent(pageURL)
    #data = json.loads(readResponse)
    #print data

    resultList = buildList()

    for item in resultList:
        UID = item[0]
        pageURL = item[1]
        print UID

        try:
            readResponse = grabContent(pageURL)
            data = json.loads(readResponse)
            pageDate = data['date']
            finalDate = datetime.strptime(pageDate, "%a, %d %b %Y %H:%M:%S %Z")
            resultList = updateEntry(finalDate, UID)
            print "      " + str(finalDate)
            print "      " + str(resultList)
        except:
            errorMsg = "NeedHumanWork"
            Data = (errorMsg, UID)
            CMD = "UPDATE URLs SET Status = %s WHERE UID = %s"
            resultList = sigTools.dbExecution(CMD, Data)
            print "      Error: " + str(resultList)
Esempio n. 3
0
def buildList():
    nullVal = None
    Data = ("0000-00-00%", nullVal)
    CMD = "select UID, URL from URLs where DateDate like %s and Status is %s limit 8"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList[2]
Esempio n. 4
0
def main():

    resultList = generateTaskList()

    for item in resultList[2]:
        cleanDateList = []
        UID = item[0]
        pageURL = item[1]
        pageURL = pageURL.replace("&", "&")

        cleanDateList = findDateInURL(pageURL)
        if len(cleanDateList) > 0:
            print UID + " :: " + pageURL
            print "  " + str(cleanDateList)
            finalDate = cleanDateList[0]

            try:
                now = datetime.datetime.now()
                currentDate = now.strftime("%Y-%m-%d %H:%M:%S")
                Data = (finalDate, 'Bot-URL', currentDate, UID)
                CMD = "UPDATE URLs SET DateDate = %s, DateDater = %s, DateDated = %s WHERE UID = %s"
                #print (CMD, Data)
                resultList = sigTools.dbExecution(CMD, Data)
                print "  " + str(resultList)
            except:
                print "Bad Entry -- " + UID
Esempio n. 5
0
def buildList():
        nullVal = None
        Data = ("0000-00-00%", nullVal)
        CMD = "select UID, URL from URLs where DateDate like %s and Status is %s limit 8"
        resultList = sigTools.dbExecution(CMD, Data)

        return resultList[2]
Esempio n. 6
0
def generateTaskList():
    Data = ("NeedsLoc")
    #CMD = "SELECT UID, URL from URLs WHERE Status like %s ORDER BY UDate DESC LIMIT 20"
    CMD = "SELECT UID, URL from URLs WHERE Status like %s and USource in (select siteSource from sourceLocales) ORDER BY UDate DESC LIMIT 60"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
Esempio n. 7
0
def main():
        pageURL = "http://www.thedailytimes.com/news/second-suspect-arrested-in-weekend-richy-kreme-robbery/article_2a3ef1fa-c5c3-11e3-a2cb-001a4bcf887a.html"
        #readResponse = grabContent(pageURL)
        #data = json.loads(readResponse)
        #print data

        resultList = buildList()

        for item in resultList:
                UID = item[0]
                pageURL = item[1]
                print UID

                try:
                        readResponse = grabContent(pageURL)
                        data = json.loads(readResponse)
                        pageDate = data['date']
                        finalDate = datetime.strptime(pageDate, "%a, %d %b %Y %H:%M:%S %Z")
                        resultList = updateEntry(finalDate, UID)
                        print "      " + str(finalDate)
                        print "      " + str(resultList)
                except:
                        errorMsg = "NeedHumanWork"
                        Data = (errorMsg, UID)
                        CMD = "UPDATE URLs SET Status = %s WHERE UID = %s"
                        resultList = sigTools.dbExecution(CMD, Data)
                        print "      Error: " + str(resultList)
Esempio n. 8
0
def main():

	resultList = generateTaskList()
	
	for item in resultList[2]:
		cleanDateList = []
		UID = item[0]
		pageURL = item[1]
		pageURL = pageURL.replace("&","&")

		cleanDateList = findDateInURL(pageURL)
		if len(cleanDateList) > 0:
			print UID + " :: " + pageURL
			print "  " + str(cleanDateList)
			finalDate = cleanDateList[0]

			try:
				now = datetime.datetime.now()
				currentDate = now.strftime("%Y-%m-%d %H:%M:%S")
				Data = (finalDate, 'Bot-URL', currentDate, UID)
				CMD = "UPDATE URLs SET DateDate = %s, DateDater = %s, DateDated = %s WHERE UID = %s"
				#print (CMD, Data)
				resultList = sigTools.dbExecution(CMD, Data)
				print "  " + str(resultList)
			except:
				print "Bad Entry -- " + UID
Esempio n. 9
0
def updateEntry(finalDate, UID):
        now = datetime.now()
        currentDate = now.strftime("%Y-%m-%d %H:%M:%S")

        Data = (finalDate, 'diffBot', currentDate, UID)
        CMD = "UPDATE URLs SET DateDate = %s, DateDater = %s, DateDated = %s WHERE UID = %s"
        resultList = sigTools.dbExecution(CMD, Data)

        return resultList
Esempio n. 10
0
def updateEntry(finalDate, UID):
    now = datetime.now()
    currentDate = now.strftime("%Y-%m-%d %H:%M:%S")

    Data = (finalDate, 'diffBot', currentDate, UID)
    CMD = "UPDATE URLs SET DateDate = %s, DateDater = %s, DateDated = %s WHERE UID = %s"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
def getFeedList():
    Data = ()
    # CMD = "select feedID, feedURL from RSSFeeds order by rand() limit 1"
    # CMD = "select feedID, feedURL from RSSFeeds where lastGrab is NULL order by rand() limit 1"
    # CMD = "select feedID, feedURL from RSSFeeds where feedID = 1"
    CMD = "select feedID, feedURL from RSSFeeds order by lastGrab Asc limit 1"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList[2]
def updateGrabTime(feedID):
    now = datetime.now()
    currentDate = now.strftime("%Y-%m-%d %H:%M:%S")

    Data = (currentDate, feedID)
    CMD = "update RSSFeeds set lastGrab = %s where feedID = %s"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
Esempio n. 13
0
def updateGrabTime(feedID):
    now = datetime.now()
    currentDate = now.strftime("%Y-%m-%d %H:%M:%S")

    Data = (currentDate, feedID)
    CMD = "update RSSFeeds set lastGrab = %s where feedID = %s"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
Esempio n. 14
0
def getFeedList():
    Data = ()
    #CMD = "select feedID, feedURL from RSSFeeds order by rand() limit 1"
    #CMD = "select feedID, feedURL from RSSFeeds where lastGrab is NULL order by rand() limit 1"
    #CMD = "select feedID, feedURL from RSSFeeds where feedID = 1"
    CMD = "select feedID, feedURL from RSSFeeds order by lastGrab Asc limit 1"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList[2]
def updateLocation(UID, placeLat, placeLon, exactness):
	currentDate = getDate()

	nullVal = "Unassigned"
	emptyVal = ""
	Data = (UID, placeLat, placeLon, exactness, 'modWorkYHOO', currentDate, nullVal, emptyVal, nullVal)
	CMD = "insert into Locations values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
	resultList = sigTools.dbExecution(CMD, Data)
	
	return resultList
def lookupWOEID(pageURL):
	polishedSrc = sigTools.getSource(pageURL)

	print "   " + polishedSrc

	Data = (polishedSrc)
	CMD = "select siteWOEID, siteLat, siteLon from sourceLocales where siteSource = %s"
	resultList = sigTools.dbExecution(CMD, Data)

	return resultList
Esempio n. 17
0
def lookupWOEID(pageURL):
    polishedSrc = sigTools.getSource(pageURL)

    print "   " + polishedSrc

    Data = (polishedSrc)
    CMD = "select siteWOEID, siteLat, siteLon from sourceLocales where siteSource = %s"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
Esempio n. 18
0
def updateLocation(UID, placeLat, placeLon, exactness):
    currentDate = getDate()

    nullVal = "Unassigned"
    emptyVal = ""
    Data = (UID, placeLat, placeLon, exactness, 'modWorkYHOO', currentDate,
            nullVal, emptyVal, nullVal)
    CMD = "insert into Locations values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
def appendEvent(UID, itemDate, itemTitle, itemDesc, itemURL):
    rightNow = datetime.now()
    currentDate = rightNow.strftime("%Y-%m-%d %H:%M:%S")
    DateDated = currentDate
    DateDater = "RSSFeed"
    Origin = "RSS-Feed"
    Status = "NeedsLoc"
    emptyField = ""
    Data = (UID, DateDated, itemDate, DateDated, DateDater, itemTitle, emptyField, itemDesc, itemURL, Origin, Status)
    CMD = "insert into URLs (UID, UDate, DateDate, DateDated, "
    CMD += "DateDater, UTitle, UError, UFirstP, URL, Origin, Status"
    CMD += ") values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
Esempio n. 20
0
def appendEvent(UID, itemDate, itemTitle, itemDesc, itemURL):
    rightNow = datetime.now()
    currentDate = rightNow.strftime("%Y-%m-%d %H:%M:%S")
    DateDated = currentDate
    DateDater = "RSSFeed"
    Origin = "RSS-Feed"
    Status = 'NeedsLoc'
    emptyField = ""
    Data = (UID, DateDated, itemDate, DateDated, DateDater, itemTitle,
            emptyField, itemDesc, itemURL, Origin, Status)
    CMD = "insert into URLs (UID, UDate, DateDate, DateDated, "
    CMD += "DateDater, UTitle, UError, UFirstP, URL, Origin, Status"
    CMD += ") values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
Esempio n. 21
0
def dropOldLocations(UID):
    Data = (UID)
    CMD = "delete from Locations where UID = %s"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
Esempio n. 22
0
def updateStatus(UID, Status):
    Data = (Status, UID)
    CMD = "update URLs set Status = %s where UID = %s"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
Esempio n. 23
0
def dropOldResponses(UID):
    Data = (UID)
    CMD = "delete from apiResponses where UID = %s"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList
Esempio n. 24
0
def generateTaskList():
	Data = ()
	CMD = "SELECT URLs.UID, URLs.URL FROM URLs WHERE URLs.DateDate LIKE '0000-00-00 00:00:00'"
	resultList = sigTools.dbExecution(CMD, Data)

	return resultList
Esempio n. 25
0
def main():

    resultList = generateTaskList()
    taskList = resultList[2]

    for item in taskList:

        try:
            UID = item[0]
            pageURL = item[1]

            print UID
            print "   " + pageURL

            resultList = dropOldResponses(UID)
            print "   " + str(resultList)

            pageURL = pageURL.decode('us-ascii', 'ignore')

            readResponse = grabReadbility(pageURL)
            readResponse = readResponse.decode('us-ascii', 'ignore')

            articleDetails = prepareArticleContent(readResponse)

            pageHeadline = articleDetails[0]
            pageExcerpt = articleDetails[1]
            pageContent = articleDetails[2]
            rawContent = articleDetails[3]

            try:
                geoResponse, siteLat, siteLon = runGeocode(pageURL, rawContent)

                locationList = extractLocation(geoResponse)

                placeLat = locationList[0]
                placeLon = locationList[1]
                exactness = locationList[2]

                print "   " + str(placeLat) + ", " + str(placeLon)

                resultList = dropOldLocations(UID)
                print "   " + str(resultList)

                resultList = updateLocation(UID, placeLat, placeLon, exactness)
                print "   " + str(resultList)

                nullVal = None
                resultList = updateStatus(UID, nullVal)
                print "   " + str(resultList)

            except:
                print "   Location Error"
                errorVal = 'LocationError'
                resultList = updateStatus(UID, errorVal)
                print "   " + str(resultList)
                geoResponse = None

            currentDate = getDate()
            Data = (UID, pageURL, readResponse, pageContent, geoResponse,
                    currentDate)
            CMD = "INSERT INTO apiResponses VALUES (%s, %s, %s, %s, %s, %s)"
            resultList = sigTools.dbExecution(CMD, Data)
            print "   " + str(resultList)

        except urllib2.HTTPError, e:
            errorMsg = "ErrorErrorError" + str(e.code)
            Data = (errorMsg, UID)
            CMD = "UPDATE URLs SET Status = %s WHERE UID = %s"
            resultList = sigTools.dbExecution(CMD, Data)
            print "   " + str(resultList)
            print "   HTTPError = " + str(e.code)
Esempio n. 26
0
def updateSource(polishedSrc, UID):
        Data= (polishedSrc, UID)        
        CMD = "update URLs set USource = %s where UID = %s"
        resultList = sigTools.dbExecution(CMD, Data)

        return resultList
Esempio n. 27
0
def generateTaskList():
        Data = ('NULL')
        CMD = "select UID, URL from URLs where length(USource) < 1 or USource is NULL or USource = %s"
        resultList = sigTools.dbExecution(CMD, Data)

        return resultList
def main():

	resultList = generateTaskList()
	taskList = resultList[2]

	for item in taskList:

		try:
			UID = item[0]
			pageURL = item[1]

			print UID
			print "   " + pageURL

			resultList = dropOldResponses(UID)
			print "   " + str(resultList)

			pageURL = pageURL.decode('us-ascii', 'ignore')

			readResponse = grabReadbility(pageURL)
			readResponse = readResponse.decode('us-ascii', 'ignore')

			articleDetails = prepareArticleContent(readResponse)

			pageHeadline = articleDetails[0]
			pageExcerpt = articleDetails[1]
			pageContent = articleDetails[2]
			rawContent = articleDetails[3]

			try:
				geoResponse, siteLat, siteLon = runGeocode(pageURL, rawContent)
			
				locationList = extractLocation(geoResponse)

				placeLat = locationList[0]
				placeLon = locationList[1]
				exactness = locationList[2]

				print "   " + str(placeLat) + ", " + str(placeLon)

				resultList = dropOldLocations(UID)
				print "   " + str(resultList)

				resultList = updateLocation(UID, placeLat, placeLon, exactness)
				print "   " + str(resultList)

				nullVal = None
				resultList = updateStatus(UID, nullVal)
				print "   " + str(resultList)

			except:
				print "   Location Error"
				errorVal = 'LocationError'
				resultList = updateStatus(UID, errorVal)
				print "   " + str(resultList)
				geoResponse = None

			currentDate = getDate()
			Data = (UID, pageURL, readResponse, pageContent, geoResponse, currentDate)
			CMD = "INSERT INTO apiResponses VALUES (%s, %s, %s, %s, %s, %s)"
			resultList = sigTools.dbExecution(CMD, Data)
			print "   " + str(resultList)

		except urllib2.HTTPError, e:
			errorMsg = "ErrorErrorError" + str(e.code)
			Data = (errorMsg, UID)
			CMD = "UPDATE URLs SET Status = %s WHERE UID = %s"
			resultList = sigTools.dbExecution(CMD, Data)
			print "   " + str(resultList)
			print "   HTTPError = " + str(e.code)
def updateStatus(UID, Status):
	Data = (Status, UID)
	CMD = "update URLs set Status = %s where UID = %s"
	resultList = sigTools.dbExecution(CMD, Data)
	
	return resultList
def dropOldResponses(UID):
	Data = (UID)
	CMD = "delete from apiResponses where UID = %s"
	resultList = sigTools.dbExecution(CMD, Data)
	
	return resultList
def dropOldLocations(UID):
	Data = (UID)
	CMD = "delete from Locations where UID = %s"
	resultList = sigTools.dbExecution(CMD, Data)
	
	return resultList
Esempio n. 32
0
def generateTaskList():
    Data = ()
    CMD = "SELECT URLs.UID, URLs.URL FROM URLs WHERE URLs.DateDate LIKE '0000-00-00 00:00:00'"
    resultList = sigTools.dbExecution(CMD, Data)

    return resultList