Beispiel #1
0
def sendErrorEmail(errorMessage, subject='Error Message'):

    sender = getConfigParameters('senderEmail')
    recipientsCommaDelimited = getConfigParameters('receiversEmail')
    recipientsCommaDelimited = recipientsCommaDelimited.split(', ')

    if( len(errorMessage) > 0 and len(subject) > 0 and len(recipientsCommaDelimited) > 0 and len(sender) > 0 ):
        
        #mod1
        sendEmail(sender, recipientsCommaDelimited, subject, errorMessage)
Beispiel #2
0
def sendErrorEmail(errorMessage, subject='Error Message'):

    sender = getConfigParameters('senderEmail')
    recipientsCommaDelimited = getConfigParameters('receiversEmail')
    recipientsCommaDelimited = recipientsCommaDelimited.split(', ')

    if (len(errorMessage) > 0 and len(subject) > 0
            and len(recipientsCommaDelimited) > 0 and len(sender) > 0):

        #mod1
        sendEmail(sender, recipientsCommaDelimited, subject, errorMessage)
Beispiel #3
0
def sendEmail(sender, receiversArray, subject, message):

    mailServer = getConfigParameters('mailServer')
    if (len(sender) > 0 and len(receiversArray) > 0 and len(subject)
            and len(message) > 0 and len(mailServer) > 0):

        toString = ''
        for email in receiversArray:
            toString = toString + ',' + email

        toString = toString[1:]
        message = 'From:' + sender + '\n' + 'To:' + toString + '\n' + 'Subject:' + subject + '\n\n' + message

        #print 'toString:', toString
        #print ''
        #print 'message:', message

        try:
            smtpObj = smtplib.SMTP(mailServer)
            smtpObj.sendmail(sender, receiversArray, message)
            print("Successfully sent email")
        except:
            print("Error: unable to send email")
            print(
                traceback.print_exception(sys.exc_info()[0],
                                          sys.exc_info()[1],
                                          sys.exc_info()[2],
                                          limit=2,
                                          file=sys.stdout))
Beispiel #4
0
def isThisURLWithinNominationDifferential(URL, tweetDateTime):

    dateDiff = 0
    returnValue = False

    URL = URL.strip()
    tweetDateTime = tweetDateTime.strip()

    if (len(URL) > 0 and len(tweetDateTime) > 0):

        URL = getCanonicalUrl(URL)
        URLHash = getFormattedTagURL(URL)

        isPostedFlag = isPosted(URLHash)

        #if this url has been postedcheck if nominationDifferential has elapsed
        if (isPostedFlag == 1):

            try:

                maxDays = getConfigParameters('nominationDifferential')

                #get post datetime from tumblr - start

                datePostedFromTumblr = getPostDateTime(URLHash)
                datePostedFromTumblr = datePostedFromTumblr.split(' ')

                datePostedFromTumblr = datePostedFromTumblr[
                    0] + ' ' + datePostedFromTumblr[1]
                datePostedFromTumblr = datetime.strptime(
                    datePostedFromTumblr, '%Y-%m-%d %H:%M:%S')
                localTimeTweet = datetime_from_utc_to_local(
                    datePostedFromTumblr)

                tweetDateTime = datetime.strptime(tweetDateTime,
                                                  '%Y-%m-%d %H:%M:%S')

                dateDiff = tweetDateTime - datePostedFromTumblr
                dateDiff = int(str(dateDiff.days))

                if (dateDiff > maxDays):
                    returnValue = True

                dateDiff = abs(maxDays - dateDiff)

                #get post datetime from tumblr - end

            except:
                returnValue = False
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                print((fname, exc_tb.tb_lineno, sys.exc_info()))
        else:
            returnValue = True

    return returnValue, dateDiff
Beispiel #5
0
def main():

    maxTimesToRunTSE = getConfigParameters('maxTimesToRunTSE')
    shouldIRunFlag = checkRunCount(maxTimesToRunTSE, '.', runCountFileName)
    if (shouldIRunFlag):
        #post to queue, create status update message to modify line in file
        postToTumblrQueue()
        notifyOnPostApproved()
        print('...DONE')
    else:
        print('...runCount exceeded')
Beispiel #6
0
def main():

    #debug - start

    extractRequestsFromTwitter()

    #MOD
    print('...calling usingTimelapseToTakeScreenShots.py')

    pythonVirtualEnvPath = getConfigParameters('pythonVirtualEnv1Path')
    os.system(pythonVirtualEnvPath + ' -u ' + globalPrefix +
              'usingTimelapseToTakeScreenShots.py &')
Beispiel #7
0
def takeScreenshots(dictionaryOfItems,
                    folderName,
                    urlsFile,
                    resolutionX='1024',
                    resolutionY='768'):

    if (len(dictionaryOfItems) > 0):

        #phantomscript = os.path.join(os.path.dirname(__file__), globalPrefix+'webshots.js')
        sortedKeys = sorted(dictionaryOfItems.keys())

        #for yearKey, urlValue in dictionaryOfItems.items():
        for yearKey in sortedKeys:
            try:
                urlValue = dictionaryOfItems[yearKey][0]
                #yearValue = extractYearFromUrl(urlValue)
                #call(['phantomjs', phantomscript, urlValue, resolutionX, resolutionY, folderName, str(yearKey)])
                puppeteerScript = os.path.join(
                    os.path.dirname(__file__),
                    globalPrefix + 'takeScreenshot.js')
                nodeSystemPath = getConfigParameters('nodeSystemPath')
                call([
                    nodeSystemPath, puppeteerScript, urlValue, resolutionX,
                    resolutionY, folderName,
                    str(yearKey)
                ])

                urlsFile.write(str(yearKey) + ': ' + urlValue + '\n')

                imagePath = os.path.join(
                    os.path.dirname(__file__),
                    globalPrefix + folderName + '/' + str(yearKey) + '.png')
                font = os.path.join(os.path.dirname(__file__),
                                    globalPrefix + 'LiberationSerif.ttf')
                addWatermark(imagePath, dictionaryOfItems[yearKey][1], font,
                             20, 640)
                archive = re.findall(r'(^https?:\/\/([a-zA-z]|\.|\-)+)',
                                     urlValue)
                archive = re.sub(r'^https?:\/\/', "", archive[0][0])
                addWatermark(imagePath, archive, font, 20, 675)
            except:
                print("Error occured when processing the following memento")
                print(urlValue)

        return True

    return False
Beispiel #8
0
def sendEmail(sender, receiversArray, subject, message):

	mailServer = getConfigParameters('mailServer')
	if(len(sender) > 0 and len(receiversArray) > 0 and len(subject) and len(message) > 0 and len(mailServer) > 0 ):

		toString = ''
		for email in receiversArray:
			toString = toString + ',' + email

		toString = toString[1:]
		message = 'From:' + sender + '\n' + 'To:' + toString + '\n' + 'Subject:' + subject + '\n\n' + message;

		#print 'toString:', toString
		#print ''
		#print 'message:', message

		try:
		   smtpObj = smtplib.SMTP(mailServer)
		   smtpObj.sendmail(sender, receiversArray, message)         
		   print "Successfully sent email"
		except:
		   print "Error: unable to send email"
		   print traceback.print_exception(sys.exc_type, sys.exc_value, sys.exc_traceback,limit=2,file=sys.stdout)
Beispiel #9
0
import re
import wikipedia
from random import randrange
from tinytag import TinyTag
from dateutil import parser

from subprocess import call
from os import walk

from surt import handyurl
from surt.IAURLCanonicalizer import canonicalize

from getConfig import getConfigParameters
from sendEmail import sendErrorEmail

globalPrefix = getConfigParameters('globalPrefix')
globalMementoUrlDateTimeDelimeter = "*+*+*"
globalRequestFilename = "twitter_requests_wdill.txt"
processedRequestFilename = "twitter_requests_wdill_store.txt"
#deprecated
#globalDataFileName = '/home/anwala/wsdl/projects/timelapse/webshots/tumblrUrlsDataFile.txt'
'''
	assumption: entries are delimited by newline
	format LANL:
	<http://www.webcitation.org/64ta04WpM>; rel="first memento"; datetime="Mon, 23 Jan 2012 02:01:29 GMT",
	<http://www.webcitation.org/6ChPDhqw8>; rel="memento"; datetime="Thu, 06 Dec 2012 03:45:27 GMT",
	<http://www.webcitation.org/6ChPHTKJY>; rel="last memento"; datetime="Thu, 06 Dec 2012 03:46:23 GMT"

	format CS:
	, <http://www.webcitation.org/64ta04WpM>; rel="first memento"; datetime="Mon, 23 Jan 2012 02:01:29 GMT",
	, <http://www.webcitation.org/6ChPDhqw8>; rel="memento"; datetime="Thu, 06 Dec 2012 03:45:27 GMT",
Beispiel #10
0
def uploadAnimatedGifToSocialMedia(folderName, URL, queueOrPublish='queue'):

    folderName = folderName.strip()
    URL = URL.strip()
    if (len(folderName) > 0 and len(URL) > 0
            and (queueOrPublish == 'queue' or queueOrPublish == 'publish')):

        tldURL = getFolderNameFromUrlOld2(URL)

        canonicalURL = getCanonicalUrl(URL)
        #TAG-MODIFICATION-ZONE
        canonicalURL = getFormattedTagURL(canonicalURL)

        if (canonicalURL != tldURL):
            tags = canonicalURL + ',' + tldURL
        else:
            tags = canonicalURL

        links = getLinks(folderName)

        indexOfNewLineCharacter = links.find('\n')
        firstline = links[:indexOfNewLineCharacter]

        beginYear, endYear = extractBeginAndEndYear(firstline)
        #gifAnimationFilename = globalPrefix + folderName + '/' + getGifFilename(folderName)
        mp4Filename = globalPrefix + folderName + '/' + folderName + 'WithAudio.mp4'
        uploadFile = ""
        if os.path.exists(mp4Filename):
            uploadFile = mp4Filename
        else:
            for item in os.listdir(globalPrefix + folderName + '/'):
                if item.endswith(".png"):
                    uploadFile = globalPrefix + folderName + '/' + item
                    break

        if (len(uploadFile) > 0):
            #instagram currently doesn't support posting videos via a web browser
            '''
			instaScript = os.path.join(os.path.dirname(__file__), globalPrefix+'instagram.js')
			username = getConfigParameters('instagramUsername')
			password = getConfigParameters('instagramPassword')
			nodeSystemPath = getConfigParameters('nodeSystemPath')
			print("...uploading to instagram")
			res = subprocess.check_output([nodeSystemPath, instaScript, username, password, globalPrefix + folderName + '/' + folderName + '.mp4', links])
			instagramLink = res.decode('utf-8')
			print(instagramLink)
			instagramLink = instagramLink.replace('\n',"")
			instagramLink = instagramLink.replace('Instagram Link: ','')
			'''

            instaScript = os.path.join(
                os.path.dirname(__file__),
                globalPrefix + 'instagramWithBrowserStack.py')
            username = getConfigParameters('instagramUsername')
            password = getConfigParameters('instagramPassword')
            browserStackUserID = getConfigParameters('browserStackUserID')
            browserStackKey = getConfigParameters('browserStackKey')
            instaAppPath = glob.glob(globalPrefix + "*.apk")[0]

            print("...uploading to Instagram")
            pythonVirtualEnvPath = getConfigParameters('pythonVirtualEnv1Path')
            instaCaption = links.split("\n")[0] + " #memento"

            res = subprocess.check_output([
                pythonVirtualEnvPath, instaScript, username, password,
                browserStackUserID, browserStackKey, instaAppPath, uploadFile,
                instaCaption
            ])

            instagramLink = res.decode('utf-8')
            instagramLink = instagramLink.replace('\n', "")
            instagramLink = instagramLink.split('Instagram Link: ')[-1]
            instagramLink = re.sub('^https?:\/\/(www\.)?', '',
                                   instagramLink.split('/?')[0])
            print(instagramLink)

            print("...uploading to tumblr")
            if uploadFile.endswith(".mp4"):
                postID = client.create_video(globalBlogName,
                                             tags=[tags],
                                             state=queueOrPublish,
                                             caption=[links],
                                             data=uploadFile)
            else:
                postID = client.create_photo(globalBlogName,
                                             tags=[tags],
                                             state=queueOrPublish,
                                             caption=[links],
                                             data=uploadFile)
            #write this postID to tumblrDataFile.txt
            return postID['id'], beginYear, endYear, instagramLink

    return -1, '', '', ''
Beispiel #11
0
import glob
import re
import time
from datetime import datetime

from bs4 import BeautifulSoup
import requests
import subprocess
import hashlib
from random import randint

from timelapse import getCanonicalUrl
from timelapse import getFolderNameFromUrlOld2
from getConfig import getConfigParameters

globalBlogName = getConfigParameters('globalBlogName')
globalPrefix = getConfigParameters('globalPrefix')
messageSuiteFileName = globalPrefix + 'statusUpdateMessageSuite.txt'

# Authenticate via OAuth
tumblrConsumerKey = getConfigParameters('tumblrConsumerKey')
tumblrConsumerSecret = getConfigParameters('tumblrConsumerSecret')
tumblrAccessToken = getConfigParameters('tumblrAccessToken')
tumblrAccessTokenSecret = getConfigParameters('tumblrAccessTokenSecret')

client = pytumblr.TumblrRestClient(tumblrConsumerKey, tumblrConsumerSecret,
                                   tumblrAccessToken, tumblrAccessTokenSecret)
'''
	input:canonicalURL
'''
Beispiel #12
0
def isThisURLWithinNominationDifferential_old(URL, tweetDateTime):

    dateDiff = 0
    returnValue = False

    URL = URL.strip()
    tweetDateTime = tweetDateTime.strip()

    if (len(URL) > 0 and len(tweetDateTime) > 0):

        # call to getFolderNameFromUrl get tld
        URL = getCanonicalUrl(URL)
        URLHash = getHash(URL)

        isPostedFlag = isPosted(URLHash)

        #if this url has been postedcheck if nominationDifferential has elapsed
        if (isPostedFlag == 1):

            try:

                maxDays = getConfigParameters('nominationDifferential')

                #get date of post

                inputFile = open(tumblrDataFileName, 'r')
                lines = inputFile.readlines()
                inputFile.close()

                for l in lines:

                    urlPostIDdateTimePosted = l.strip().split(', ')
                    urlPosted = urlPostIDdateTimePosted[0].strip()

                    #get tld
                    urlPosted = getCanonicalUrl(urlPosted)
                    urlPosted = urlPosted.strip()

                    #more normalization - start

                    URL = URL.lower()

                    if (URL[-1] != '/'):
                        URL = URL + '/'

                    if (urlPosted[-1] != '/'):
                        urlPosted = urlPosted + '/'

                    #more normalization - end

                    if (URL == urlPosted):

                        datePosted = urlPostIDdateTimePosted[2]

                        datePosted = datetime.strptime(datePosted,
                                                       '%Y-%m-%d %H:%M:%S')
                        tweetDateTime = datetime.strptime(
                            tweetDateTime, '%Y-%m-%d %H:%M:%S')

                        dateDiff = tweetDateTime - datePosted
                        dateDiff = int(str(dateDiff.days))

                        if (dateDiff > maxDays):
                            returnValue = True

                        dateDiff = maxDays - dateDiff

                        break

            except:
                returnValue = False
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                print((fname, exc_tb.tb_lineno, sys.exc_info()))
        else:
            returnValue = True

    return returnValue, dateDiff
Beispiel #13
0
def getRequestUrls():

	sinceIDValue = ''
	sinceIDFilename = globalPrefix + 'sinceID.txt'
	try:
		print 'f:', sinceIDFilename

		sinceIDFile = open(sinceIDFilename, 'r')
		prevSinceIDFile = open(globalPrefix + 'prev_sinceID.txt', 'w')

		line = sinceIDFile.readline()

		if(len(line) > 1):
			sinceIDValue = long(line)
		else:
			sinceIDValue = long('0')

		prevSinceIDFile.write(str(sinceIDValue) + '\n')

		sinceIDFile.close()
		prevSinceIDFile.close()
	except:
		exc_type, exc_obj, exc_tb = sys.exc_info()
		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
		print(fname, exc_tb.tb_lineno, sys.exc_info() )
		sinceIDValue = long('0')


	#get spam filter coeff.
	spamFilterCoeff = getConfigParameters('spamFilterCoeff')
	print 'spamFilterCoeff', spamFilterCoeff

	requestsRemaining = 0
	try:
		requestsRemaining = api.rate_limit_status()['resources']['search']['/search/tweets']['remaining']
	except:
		requestsRemaining = 0
		#m1-handle exception
		exc_type, exc_obj, exc_tb = sys.exc_info()
		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
		print(fname, exc_tb.tb_lineno, sys.exc_info() )

		errorMessage = (fname, exc_tb.tb_lineno, sys.exc_info() )
		sendErrorEmail( str(errorMessage) )


	#requestsRemaining = 10
	print "Before Request remaining: ", requestsRemaining


	if( requestsRemaining > 0 ):
		#<user, expandedUrlsArray>
		twitterUsersDict = {}


		#assume initially tweet is present
		isTweetPresentFlag = True
		while( isTweetPresentFlag ):
			
			#if tweet is present this will change the False to True, else it will remain False and Stop the loop
			isTweetPresentFlag = False
			print "current sinceIDValue: ", sinceIDValue
			#m1-handle exception
			try:
				for tweet in tweepy.Cursor(api.search, q="%23icanhazmemento", since_id=sinceIDValue).items(30):
					print
					isTweetPresentFlag = True

					# From 2015-07-12 18:45:11
					# To   Sun, 12 Jul 2015 14:45:11 GMT
					localTweetDatetime = datetime_from_utc_to_local(tweet.created_at)
					#localTweetDatetime = tweet.created_at
					localTweetDatetime = localTweetDatetime.strftime('%a, %d %b %Y %H:%M:%S')
					localTweetDatetime = str(localTweetDatetime) + ' GMT'


					#update since_id
					if( tweet.id > sinceIDValue ):
						sinceIDValue = tweet.id

					#MOD
					print localTweetDatetime, ",tweet_id:", tweet.id, ",", tweet.user.screen_name, " - ", tweet.text

					#get urls from tweet - start
					#since even though access to none short url, still meant that
					#url has to be chased down until the final value, no need to access none
					#short url
					shortTwitterUrls = []
					if( tweet.text.find('#icanhazmemento') != -1 ):
						for shortURL in tweet.entities['urls']:
							#print 'n: ', shortURL['expanded_url']
							shortTwitterUrls.append(shortURL['url'])
					#get urls from tweet - end


					#if this tweet is in response to a parent tweet with link(s) - start
					if( tweet.in_reply_to_status_id is not None and len(shortTwitterUrls) == 0):
						print 'parent ID:', tweet.in_reply_to_status_id

						parentTweet = api.get_status(tweet.in_reply_to_status_id)
						print 'parent tweet:', parentTweet.text
						for shortURL in parentTweet.entities['urls']:
							#print 'n: ', shortURL['expanded_url']
							shortTwitterUrls.append(shortURL['url'])
					#if this tweet is in response to a parent tweet with link(s) - end

					if(len(shortTwitterUrls) != 0):
						for url in shortTwitterUrls:
							potentialExpandedUrl = expandUrl(url)

							if( len(potentialExpandedUrl) > 0 ):

								#url normalization - start
								if( potentialExpandedUrl[-1] == '/' ):
									potentialExpandedUrl = potentialExpandedUrl[:-1]
								#url normalization - end

								#create new entry for user since user is not in dictionary
								print '...potentialExpandedUrl:', potentialExpandedUrl
								potentialExpandedUrl = potentialExpandedUrl.strip()

								#note spam filter not implemented since twitter blocks spam
								twitterUsersDict[tweet.id] = {}
								twitterUsersDict[tweet.id]['screen_name'] = tweet.user.screen_name
								twitterUsersDict[tweet.id]['expandedUrl'] = potentialExpandedUrl
								twitterUsersDict[tweet.id]['create_datetime'] = localTweetDatetime
						
								'''
								#faulty logic
								if( tweet.user.screen_name in twitterUsersDict):
									
									
									#spam filter measure - start
									if( len(twitterUsersDict[tweet.user.screen_name]) < spamFilterCoeff ):
										twitterUsersDict[tweet.user.screen_name]['potentialExpandedUrlsArray'].append(potentialExpandedUrl)
									#spam filter measure - end
								else:
									#twitterUsersDict[tweet.user.screen_name] = [potentialExpandedUrl]

									twitterUsersDict[tweet.user.screen_name] = {}

									twitterUsersDict[tweet.user.screen_name]['potentialExpandedUrlsArray'] = [potentialExpandedUrl]
									twitterUsersDict[tweet.user.screen_name]['create_datetime'] = localTweetDatetime
									twitterUsersDict[tweet.user.screen_name]['tweet_id'] = tweet.id
								'''
			except:
				exc_type, exc_obj, exc_tb = sys.exc_info()
				fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
				print(fname, exc_tb.tb_lineno, sys.exc_info() )

				errorMessage = (fname, exc_tb.tb_lineno, sys.exc_info() )
				sendErrorEmail( str(errorMessage) )
			
			if( isTweetPresentFlag ):
				print '...sleeping for 15 seconds'
				time.sleep(15)
	try:
		#MOD
		sinceIDFile = open(sinceIDFilename, 'w')
		
		#print 'DEBUG CAUTION, sinceIDValue SET'
		#print
		#sinceIDValue = 630131997084622848

		sinceIDFile.write(str(sinceIDValue) + '\n')
		sinceIDFile.close()
		
	except:
		exc_type, exc_obj, exc_tb = sys.exc_info()
		fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
		print(fname, exc_tb.tb_lineno, sys.exc_info() )

	return twitterUsersDict
Beispiel #14
0
import tweepy
import time
import os, sys
import commands

from common import datetime_from_utc_to_local
from common import getOrSetArchive
from common import expandUrl

from getConfig import getConfigParameters
from sendEmail import sendErrorEmail


# Consumer keys and access tokens, used for OAuth
consumer_key = getConfigParameters('twitterConsumerKey')
consumer_secret = getConfigParameters('twitterConsumerSecret')
access_token = getConfigParameters('twitterAccessToken')
access_token_secret = getConfigParameters('twitterAccessTokenSecret')

# OAuth process, using the keys and tokens
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

# Creation of the actual interface, using authentication
api = tweepy.API(auth)
globalPrefix = getConfigParameters('globalPrefix')


#gets tweets with since id larger (means tweet is newer) than the previous since id 
#updates since id with largest tweet sinceID
def getRequestUrls():
Beispiel #15
0
import os, sys
from timelapse import timelapse
from getConfig import getConfigParameters

globalPrefix = getConfigParameters('globalPrefix')
globalInputUrlsFileName = globalPrefix + 'twitter_requests_wdill.txt'
try:

    listOfNominatedURLs = open(globalInputUrlsFileName, "r")
    nominationTuples = listOfNominatedURLs.readlines()

except:
    exc_type, exc_obj, exc_tb = sys.exc_info()
    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
    print((fname, exc_tb.tb_lineno, sys.exc_info()))
    listOfNominatedURLs.close()

print("read lines: ", len(nominationTuples))
originalPath = os.getcwd()

for i in range(0, len(nominationTuples)):

    #<0: URL, 1: SCREEN-NAME, 2: DATE-TIME, 3: TWEET-ID, 4: (optional POST-ID, 5: POST-FLAG) >
    nominationData = nominationTuples[i].split(' <> ')

    #this url has not been published
    if (len(nominationData) < 6):

        URL = nominationData[0].strip()
        #screen_name = nomimationData[1].strip()
        #datetime = nomimationData[2].strip()
Beispiel #16
0
from timelapseTwitter import updateStatus
from timelapseTwitter import updateStatusWithMedia
from timelapseTwitter import isThisURLWithinNominationDifferential

from common import getHashString
from common import getFormattedTagURL
from common import extractBeginAndEndYear
from common import getLinks
from common import uploadAnimatedGifToSocialMedia
from common import isPosted
from common import getPageTitle
from common import getRandomStatusUpdateMessage
from common import getPostID
from common import getPostDateTime

globalBlogName = getConfigParameters('globalBlogName')
globalPrefix = getConfigParameters('globalPrefix')

runCountFileName = globalPrefix + 'runCountTSE.txt'
globalDataFileName = globalPrefix + 'twitter_requests_wdill.txt'
globalDataStoreFileName = globalPrefix + 'twitter_requests_wdill_store.txt'
debugOutputFileName = globalPrefix + 'debugOutputFile.txt'


def makeStatusUpdateAndNotifyReferrer(twitterStatusUpdateMessage,
                                      screen_nameOfUserWhoSuggestedUri,
                                      tweet_id, URL, link, instagramLink,
                                      filename):

    modifyEntryFlag = False
    twitterStatusUpdateMessage = twitterStatusUpdateMessage.strip()
Beispiel #17
0
def getMementosPages(url):

    pages = []
    url = url.strip()
    if (len(url) > 0):

        firstChoiceAggregator = getConfigParameters('mementoAggregator')
        timemapPrefix = firstChoiceAggregator + url
        #timemapPrefix = 'http://mementoproxy.cs.odu.edu/aggr/timemap/link/1/' + url
        '''
			The CS memento aggregator payload format:
				[memento, ..., memento, timemap1]; timemap1 points to next page
			The LANL memento aggregator payload format:
				1. [timemap1, ..., timemapN]; timemapX points to mementos list
				2. [memento1, ..., mementoN]; for small payloads
			For LANL Aggregator: The reason the link format is used after retrieving the payload
								 with json format is due to the fact that the underlying code is based
								 on the link format structure. json format was not always the norm 
		'''

        #select an aggregator - start
        aggregatorSelector = ''

        co = 'curl --silent -I ' + timemapPrefix
        head = commands.getoutput(co)

        indexOfFirstNewLine = head.find('\n')
        if (indexOfFirstNewLine > -1):

            if (head[:indexOfFirstNewLine].split(' ')[1] != '200'):
                firstChoiceAggregator = getConfigParameters(
                    'latentMementoAggregator')
                timemapPrefix = firstChoiceAggregator + url

        if (firstChoiceAggregator.find('cs.odu.edu') > -1):
            aggregatorSelector = 'CS'
        else:
            aggregatorSelector = 'LANL'

        print '...using aggregator:', aggregatorSelector
        #select an aggregator - end

        #CS aggregator
        if (aggregatorSelector == 'CS'):
            while (True):
                #old: co = 'curl --silent ' + timemapPrefix
                #old: page = commands.getoutput(co)

                page = ''
                r = requests.get(timemapPrefix)
                print 'status code:', r.status_code
                if (r.status_code == 200):
                    page = r.text

                pages.append(page)
                indexOfRelTimemapMarker = page.rfind('>;rel="timemap"')

                if (indexOfRelTimemapMarker == -1):
                    break
                else:
                    #retrieve next timemap for next page of mementos e.g retrieve url from <http://mementoproxy.cs.odu.edu/aggr/timemap/link/10001/http://www.cnn.com>;rel="timemap"
                    i = indexOfRelTimemapMarker - 1
                    timemapPrefix = ''
                    while (i > -1):
                        if (page[i] != '<'):
                            timemapPrefix = page[i] + timemapPrefix
                        else:
                            break
                        i = i - 1
        else:
            #LANL Aggregator
            #old: co = 'curl --silent ' + timemapPrefix
            #old: page = commands.getoutput(co)

            page = ''
            r = requests.get(timemapPrefix)
            if (r.status_code == 200):
                page = r.text

            try:
                payload = json.loads(page)

                if 'timemap_index' in payload:

                    for timemap in payload['timemap_index']:

                        timemapLink = timemap['uri'].replace(
                            '/timemap/json/', '/timemap/link/')
                        #old: co = 'curl --silent ' + timemapLink
                        #old: page = commands.getoutput(co)
                        #old: pages.append(page)
                        r = requests.get(timemapLink)
                        if (r.status_code == 200):
                            pages.append(r.text)

                elif 'mementos' in payload:
                    #untested block
                    timemapLink = payload['timemap_uri'][
                        'json_format'].replace('/timemap/json/',
                                               '/timemap/link/')
                    #old: co = 'curl --silent ' + timemapLink
                    #old: page = commands.getoutput(co)
                    #old: pages.append(page)

                    print 'timemap:', timemapLink
                    r = requests.get(timemapLink)
                    if (r.status_code == 200):
                        pages.append(r.text)

            except:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                print(fname, exc_tb.tb_lineno, sys.exc_info())

    return pages
Beispiel #18
0
from urllib.parse import urlparse
import re
from dateutil import parser

from sendEmail import sendEmail, sendErrorEmail
from timelapse import getCanonicalUrl
from getConfig import getConfigParameters

from common import getFormattedTagURL
from common import isPosted
from common import getPostDateTime
from common import getPageTitle
from common import datetime_from_utc_to_local

# Consumer keys and access tokens, used for OAuth
consumer_key = getConfigParameters('twitterConsumerKey')
consumer_secret = getConfigParameters('twitterConsumerSecret')
access_token = getConfigParameters('twitterAccessToken')
access_token_secret = getConfigParameters('twitterAccessTokenSecret')

# OAuth process, using the keys and tokens
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

# Creation of the actual interface, using authentication
api = tweepy.API(auth)

whatDidItLookLikeTwitterScreenName = getConfigParameters(
    'whatDidItLookLikeTwitterScreenName')
whatDidItLookLikeTwitterRequestHashtag = getConfigParameters(
    'whatDidItLookLikeTwitterRequestHashtag')