Ejemplo n.º 1
0
def get_twitters():

	twitters = []
	tags = request.args.get('hashtags',False).replace('#','%23').replace('@','%40').replace(' ','').split(',')

	try:
		if len(tags)>=1:
			search_settings = TwitterSearchOrder() # create a TwitterSearchOrder object
			search_settings.set_include_entities(False) # and don't give us all those entity information
			search_settings.set_keywords( tags ) # let's define all words we would like to have a look for

			# it's about time to create a TwitterSearch object with our secret tokens
			search_on_twitter = TwitterSearch(
				consumer_key = APP_KEY,
				consumer_secret = APP_SECRET,
				access_token = OAUTH_TOKEN,
				access_token_secret = OAUTH_TOKEN_SECRET
			)
			# this is where the fun actually starsearch_on_twitter :)
			for twitter in search_on_twitter.search_tweets_iterable(search_settings):
				tw = { 'text' : twitter['text'], 'profile_image_url' : twitter['user']['profile_image_url'], 'name' : twitter['user']['name'], 'screen_name' : twitter['user']['screen_name'], 'location' : twitter['user']['location'] }
				if tw not in twitters:
					twitters.append( tw )
			# return json
			return json.dumps(twitters)
		else:
			return json.dumps([])
			
	except TwitterSearchException as e: # take care of all those ugly errors if there are some
		return json.dumps([])
    def __init__(self, consumer_key, consumer_secret, access_token, access_token_secret, keyword, since_id, tweet_count=15):
        self._results = []
        self._i = 0

        print("since_id: {0}".format(since_id))
        try:
            tso = TwitterSearchOrder()
            tso.set_keywords([keyword])
            tso.set_language('ja')
            tso.set_include_entities(False)
            tso.set_count(tweet_count)
            if since_id > 0:
                tso.set_since_id(long(since_id))

            ts = TwitterSearch(
                consumer_key = consumer_key,
                consumer_secret = consumer_secret,
                access_token = access_token,
                access_token_secret = access_token_secret
             )

            for tweet in ts.search_tweets_iterable(tso):
                self._results.append(
                    {
                        'screen_name': tweet['user']['screen_name'],
                        'user_name': tweet['user']['name'],
                        'profile_image_url': tweet['user']['profile_image_url'],
                        'text': tweet['text'],
                        'created_at': tweet['created_at'],
                        'id': tweet['id']
                    }
                )

        except TwitterSearchException as e:
            print(e)
Ejemplo n.º 3
0
class TweetSearch:
    def __init__(self):
        self.ts = TwitterSearch(
                consumer_key = 'uVp0jLzC043jvVxsoYtO7XnTy',
                consumer_secret = 'zHHqf6gaRGeLX9PS4YB4BMhcUo7p8dyI02cZLxVQOTnoHEG0gh', 
                access_token = '247768860-1BdrGZgXQibjaDSiZxGQ1MbjCxCEsM85gDFnRMjr',
                access_token_secret = 'ImetdaaKxq4uMvkQiMIxbGiR92ywqjYas52EZSXOyPu1t')
               # consumer_key = 'zTY2l3OYf9n50WgPG6KOCcr3J',
               # consumer_secret = 'sHqr1o1bCmW5xqPQE6wA7wCwsti00kT6hDnM6SlHNIr2kqStiJ', 
               # access_token = '597976696-zDOpw9mCLkJ05JKXemq9OAJ1qf6pjVg0G4zhtCrl',
               # access_token_secret = 'lmiwWH69u5MfDGWNhXaFlcyo4882uN2Fm7dYxcAPVPaAq')

    def search(self,keywords):
        print keywords
        tso = TwitterSearchOrder()
        tso.set_keywords(keywords)
        tso.set_language('en')
        tso.set_include_entities(False)
        tweets = None
        try:
            tweets = self.ts.search_tweets_iterable(tso)
            print tweets
        except TwitterSearchException as e: # catch all those ugly errors
            print(e)
        return tweets
Ejemplo n.º 4
0
def jobInteraction(tag):

    tags = tag.split(",")
    tweets = []

    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords(tags)  # let's define all words we would like to have a look for
        tso.set_language('es')  # we want to see German tweets only
        tso.set_include_entities(True)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            # consumer_key='QVhq5HVolTuzE79c16YDegtia',
            # consumer_secret='bfSPCAKXca52eaa2GF4a4mGceYVy4VkENwSuQtnr2c9e34TgWq',
            # access_token='1196870581-DfDo1GQQaukRZQBWn72ugdATSJqKPb4EaMsOFRK',
            # access_token_secret='tRV1lizrsCj8maKxOkzcDvp6vGJLBgDXH0ueEzmXSQTOi'
            consumer_key='gDEFFAToqZ1j5cE9SgJkeqvBY',
            consumer_secret='jqKGAra9Kd0n4jwsQXkhairyxx0uv9D4iMme6AeE2NLDX3fPfz',
            access_token='17160146-FxfSx4Bdq7SvuENSgHvi175f7uyjwoHCHVMUYiJQP',
            access_token_secret='SREyq0DxHOurUY5E0AbT3kPDwl5IFDcPFmnehZjbaH5ab'
        )

        # this is where the fun actually starts :)
        for tweet in ts.search_tweets_iterable(tso):
            # print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text']))
            tweets.append(tweet)

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)

    print len(tweets)

    return json.dumps(tweets)
Ejemplo n.º 5
0
def search(query='cheeky nandos ledge banter', max=5):
    keywords = query.split()
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords(keywords)
        # tso.set_language('en')
        # tso.set_include_entities(False)

        ts = TwitterSearch(
            consumer_key=app.config['TWITTER_CONSUMER_KEY'],
            consumer_secret=app.config['TWITTER_CONSUMER_SECRET'],
            access_token=app.config['TWITTER_ACCESS_TOKEN'],
            access_token_secret=app.config['TWITTER_TOKEN_SECRET']
        )
        results = []
        for tweet in ts.search_tweets_iterable(tso):
            results.append(tweet['id'])
            # print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
            max -= 1
            if not max: break
        # print results
        return results

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
Ejemplo n.º 6
0
def search_results(words):

    #auth = OAuthHandler(keys.consumer_key, keys.consumer_secret)
    #auth.set_access_token(keys.access_token, keys.access_token_secret)

    try:

        tso = TwitterSearchOrder()
        tso.set_keywords(words)

        rilo_twitter = TwitterSearch(
            consumer_key = keys.consumer_key,
            consumer_secret = keys.consumer_secret,
            access_token = keys.access_token,
            access_token_secret = keys.access_token_secret
            )

        for tweet in rilo_twitter.search_tweets_iterable(tso):
            if  tweet['retweet_count'] > 150:
                #API.retweet(tweet['id'])
                print('@%s tweeted:  %s' % (tweet['user']['screen_name'], tweet['text']))
                print('\n\n\n')

    except TwitterSearchException as e:
        print (e)
Ejemplo n.º 7
0
def search():

    tw = []
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords([" "])  # let's define all search keywords - now, we want all tweets with a space in them
        tso.set_language("en")  # we want to see english tweets only
        tso.set_include_entities(False)  # and don't give us all those entity information
        ts = TwitterSearch(
            consumer_key="zg9yQTGTT2oizk3XLMHGLzfpJ",
            consumer_secret="nmiwqRpWDX0oxTCUTro8sPeUVUXIZHW9O1VZcTb0mLyfHw51sc",
            access_token="700001043-oxm3LZ72y4WmWGRqY66QjV0SzZoHGy5OGgwic26M",
            access_token_secret="hGJZWTb5bjGFSiuIQrff5UajKdlyXcp7Lyun5SJzq05Su",
        )
        i = 0
        for tweet in ts.search_tweets_iterable(tso):
            # if (tweet['retweet_count'] != 0):
            # tw.append((len(tweet['text'].split()), tweet['retweet_count']))
            # print(str(i))
            # backspace(len(str(i)))
            tw.append(tweet)
            if i == 300:
                break
            i += 1
        return tw
        # print tw
        # print( '%s: @%s tweeted: %s' % ( tweet['retweet_count'], tweet['user']['screen_name'], tweet['text'] ) )
        # print # of retweents, tweeter, and content of tweet
    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print (e)
Ejemplo n.º 8
0
def search(text,limit):
	tweets_list = []
	try:
	    tso = TwitterSearchOrder() # create a TwitterSearchOrder object
	    tso.set_keywords(text) # let's define all words we would like to have a look for
	    tso.set_language('en') # we want to see English tweets only
	    tso.set_include_entities(False) # and don't give us all those entity information

	    # it's about time to create a TwitterSearch object with our secret tokens
	    ts = TwitterSearch(
		consumer_key = 	'tbHIo3PImh0pSIETLlO8wIKj4',
		consumer_secret = 'QmzJYSAp9rw6O7tDJATkm7Avq0OBRTfZbdNf3BjEmDmdDB1jT2',
		access_token = '1315897358-IkDrUD4Zdy6HP3FjF4UxdBqICEZOU91Lys95FGu',
		access_token_secret = 'nHROttog8743ZmeBWeldvh24EHwXtW4h1Z69o1GsgV2zE'
	     )

	     # this is where the fun actually starts :)
	    cnt=0
	    for tweet in ts.search_tweets_iterable(tso):
		cnt+=1
		if cnt>limit:
		    break
	        tweets_list.append(tweet['text'])
		#print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
	    print cnt,'tweets'

	except TwitterSearchException as e: # take care of all those ugly errors if there are some
	    print(e)
	return tweets_list
Ejemplo n.º 9
0
 def serve_twitter_news(self):
     try:
         tso = TwitterSearchOrder()
         tso.set_keywords([topic])
         tso.set_language('en')
         tso.set_include_entities(False)
         
         ts = TwitterSearch(
             consumer_key = my_consumer_key,
             consumer_secret = my_consumer_secret,
             access_token = my_access_token,
             access_token_secret = my_access_token_secret
          )
          
         counter = 0
         batch_size = 5
         updates = []
         
         for tweet in ts.search_tweets_iterable(tso):
             update = '@%s: %s' % ( tweet['user']['screen_name'].encode('utf-8').strip(), tweet['text'].encode('utf-8').strip() )
             updates.append(update)
             logging.debug(update)
             counter += 1
             if counter >= batch_size:
                 self.send_JSON({ 'update' : updates })
                 break
     except TwitterSearchException as e:
         pass
def twitterStreaming():
    from time import sleep
    sleep(5)
    try:
        # it's about time to create a TwitterSearch object with our secret tokens
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.set_keywords(['Swissquote']) # let's define all words we would like to have a look for
        tso.set_language('en') # we want to see German tweets only
        tso.set_include_entities(False) # and don't give us all those entity information
        lastID = 569803155141206016
        tso.set_since_id(lastID)        
        ts = TwitterSearch(
            consumer_key = 'a',
            consumer_secret = 'a',
            access_token = 'a-a',
            access_token_secret = 'b'
        )
        for tweet in ts.search_tweets_iterable(tso):
            print( '[%s]@%s tweeted: %s' % ( tweet['created_at'], tweet['user']['screen_name'], tweet['text'] ) )
            if(lastID < tweet['id']):
                lastID = tweet['id']
            serveurStreaming.send(tweet['text'])
    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print(e)
    serveurStreaming.isConnectionAlive = False
Ejemplo n.º 11
0
    def get(self, keyword=None, lang="en", max=20):
        if (keyword):
            try:
                tso = TwitterSearchOrder()
                tso.set_keywords([keyword])
                tso.set_language(lang)  
                tso.set_include_entities(False) 

                ts = TwitterSearch(
                    Config.settings['twitter']['consumer_key'],
                    Config.settings['twitter']['consumer_secret'],
                    Config.settings['twitter']['access_token'],
                    Config.settings['twitter']['access_token_secret']
                )

                counter = 0 
                sleep_at = max if max is not None else 20 
                sleep_for = 30 

                for tweet in ts.search_tweets_iterable(tso):
                    #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'].text.encode('utf-8'), tweet['text'].text.encode('utf-8') ) )
                    print tweet
                    counter += 1 # increase counter
                    if counter >= sleep_at: # it's time to apply the delay
                        counter = 0
                        break
                        #time.sleep(sleep_for) # sleep for n secs

            except TwitterSearchException as e:
                abort(500) 

        return False
Ejemplo n.º 12
0
def pull_tweet_responses(username, tweet_id):
    """
    Queries twitter for tweets mentioning user_id and afer tweet_id
    checks to see if found tweets are in response to tweet_id
    if response and not RT, saves relevant details to SQL database
    :param username:
    :param tweet_id:
    """
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords(['@' + username])
        tso.set_language('en')
        tso.set_since_id(tweet_id)

        ts = TwitterSearch(
                consumer_key=api_key,
                consumer_secret=api_secret,
                access_token=access_token_key,
                access_token_secret=access_token_secret
        )
        for tweet in ts.search_tweets_iterable(tso):
            if tweet['in_reply_to_status_id'] == tweet_id and \
                            tweet['text'][:2] != 'RT':
                write_response_to_mysql(tweet)

    except TwitterSearchException as e:
        print('\nTweet id: ' + str(tweet_id))
        print(e)
Ejemplo n.º 13
0
def printUser(username):
    try:
        tuo = TwitterUserOrder(username) # create a TwitterUserOrder

        ts = TwitterSearch(
            consumer_key = '1kj4GBRevJITV4S40kLXGHVG2',
            consumer_secret = 'c80dJF41IwQV2G4ynR8VYblMQU15M4bc8OFg3aG6l8Y0aoSFhU',
            access_token = '1708110452-e3unR8gR7WRMGDoCh3aZutMPL3bFBLFlqHz8tzy',
            access_token_secret = 'kkiZDDp8KXLB8cRDwsMqBDc5IxqiaVXSmbQ2XtZEij0tl'
        )

        def my_callback_closure(current_ts_instance):
            queries, tweets_seen = current_ts_instance.get_statistics()
            #print queries, tweets_seen
            if queries > 0 and (queries % 5) == 0: # trigger delay every 5th query
                time.sleep(60) # sleep for 60 seconds
        
        i = 0
        # start asking Twitter about the timeline
        for tweet in ts.search_tweets_iterable(tuo, callback=my_callback_closure):
            if i > 50:
                break
            #print tweet['user']['screen_name']
            content =  tweet['text'].encode('utf-8')
            print content

            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'].encode('utf-8') ) )
            i += 1
        return 1

    except TwitterSearchException as e: # catch all those ugly errors
        print(e)
        return 0
Ejemplo n.º 14
0
def perform_search(request):
    """Create a page that counts hashtags"""
    tag_to_search = ""
    if request.method == "POST":
        tag_to_search = request.POST["search"]
    keyword = '"#' + tag_to_search + '"'
    users = []
    postCount = 0
    hashCount = Counter()
    uniqueHashCount = Counter()

    # Now try and talk to twitter
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords([keyword])  # This is the value we search for
        tso.set_include_entities(True)  # This is to include entity information, like Hashtags

        # This is the actual search. Secrets and key's have to be obtained from twitter, and aren't to be shared.
        ts = TwitterSearch(consumer_key="xxx", consumer_secret="yyy", access_token="qqq", access_token_secret="rrr")

        # This iterates through the found tweets
        for tweet in ts.search_tweets_iterable(tso):
            # count each tweet
            postCount += 1
            # Add the organize and record the tweets for later access
            add_tweet(tweet, users)
        # now count them
        count_hashtags(hashCount, uniqueHashCount, users)
        new_id = save_data(keyword.upper(), hashCount, uniqueHashCount, postCount, len(users))
    # catch errors
    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        return str(e)

    # return that string
    return search(request, new_id)
Ejemplo n.º 15
0
def retrieveTweets(keyword):
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords([keyword])
        tso.set_language('en')
        tso.set_include_entities(False)
        ts = TwitterSearch(
            consumer_key = 'dxDoYB875ZUsvgPtp8EVDkyq6',
            consumer_secret = '6v4GiG1B3zKmJOsYPEtb0b39lv9da7iu7pIdAANyIoisoNrtZY',
            access_token = '2157789854-Fwr0uDJQ23twqSyxPEH0VnPwafQvpay8K2z7aFQ',
            access_token_secret = 'q9S6ECBpBv1RMBG8iNT8cYdoJvQAoIMZfMHAivs5Fh0PQ')

        htmlstring = ""
        print "lolpls"

        i = 0
        for tweet in ts.search_tweets_iterable(tso):
            htmlstring += "<div><strong><a href='http://twitter.com/%s'>@%s</a></strong> %s" % (tweet['user']['screen_name'], tweet['user']['screen_name'], tweet['text']) + '</div>'

            i += 1
            if i > 1:
                break

    except TwitterSearchException as e:
        print(e)

    return htmlstring
Ejemplo n.º 16
0
def getTweets(username):
    tFeeds=[]
    try:
        #tuo = TwitterUserOrder(username) # create a TwitterUserOrder
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.set_keywords([username])
        tso.set_language('en')
        tso.set_count(50)
        tso.set_include_entities(False)
        tso.set_until(date.today()-timedelta(days=2))

        # it's about time to create TwitterSearch object
        ts = TwitterSearch(
            consumer_key = '%%%',
            consumer_secret = '^^^',
            access_token = '&&&',
            access_token_secret = '@@@'
        )

        # start asking Twitter
        counter=0
        for tweet in ts.search_tweets_iterable(tso):
            if (counter==300):
                break
            tweetx=str(tweet['text'].encode('ascii', 'ignore'))
            counter=counter+1
            tFeeds.append(tweetx)
            
    except TwitterSearchException as e: # catch all those ugly errors
        print(e)
        
    return tFeeds
Ejemplo n.º 17
0
def mainLoop():
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.set_keywords([keyword1]) # let's define all words we would like to have a look for
        tso.set_language('en') # we want to see English tweets only
        tso.set_include_entities(False) # and don't give us all the entity information
    
        # create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key = consumerKey,
            consumer_secret = consumerSecret,
            access_token = accessToken,
            access_token_secret = accessSecret
         )
    # call API
        print "Checking for new tweets that match keywords: %s or %s" % (keyword1,keyword2)
        
        for tweet in ts.search_tweets_iterable(tso):
            
            # bind variables to information from tweets we're interested in
            username = (tweet['user']['screen_name']).encode('ascii', 'replace')
            tweetText = (tweet['text']).encode('ascii', 'replace')
            date = (tweet['created_at']).encode('ascii', 'replace')
            
            if isStringinCSV([username, tweetText, date]) == False: # check to see if individual tweet from TwitterSearch object is in our log
                print "New Tweet!"
                
                writeToCSV([username, tweetText, date]) # if so, write to log
        
        print "Check complete."
        
    except TwitterSearchException as e: # take care of all those ugly errors if there are any
        print(e)
def crawl(filename, keywords, language):
    f = codecs.open(filename, "a", "utf-8")
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords(keywords)  # let's define all words we would like to have a look for
        tso.set_language(language)  # we want to see German tweets only
        tso.set_include_entities(False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key='MozbqzFag8UQMbuw9qkuyG7Fm',
            consumer_secret='c4m8EKOwQb90A3nLLySKSEkV7fVXe8taZq4IjgDrMVKihbNW4s',
            access_token='2684788650-VOzUZGhPItlgye6w5LhX5QMevWLK8WTALcxe8KM',
            access_token_secret='9IeW0F8XFnZ7FV5sCyZIahLEZBQTkzwO4L0q3vqRkl4je'
        )

        # this is where the fun actually starts :)
        for tweet in ts.search_tweets_iterable(tso):
            tweet_text = tweet['text'].replace("\n", " ")
            id1 = uuid.uuid4()
            id2 = uuid.uuid4()
            label = random_label()
            f.write('@%s\t%s\t%s\t%s\n' % (id1, id2, label, tweet_text))

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)

    f.close()
Ejemplo n.º 19
0
def get_tweet(token_num, user, clan):
    date = time.strftime('%Y-%m-%d %H:%M:%S')
    ts = TwitterSearch(
        consumer_key = access_tokens[token_num]['consumer_key'],
        consumer_secret = access_tokens[token_num]['consumer_secret'],
        access_token = access_tokens[token_num]['access_token'],
        access_token_secret = access_tokens[token_num]['access_token_secret'],
     )
    tso = TwitterUserOrder(user)
    for tweet in ts.search_tweets_iterable(tso):
        #print('@%s tweeted: %s' % (tweet['user']['screen_name'].encode('utf-8'), tweet['text'].encode('utf-8')))
        cursor.execute('INSERT IGNORE INTO tweets (date, name, clan, tweet) VALUES (%s, %s, %s, %s)', (date, user, clan, json.dumps(tweet),))
Ejemplo n.º 20
0
def HashTracker():
 #ts = TwitterSearch(
          #consumer_key = 'oiqPmaj6hTVywkKlizDzw50l8',
          #consumer_secret = 'jHSBDW8E1doOnKKfabdMEnJFIIU0UCz9ufVwvhNhlDNu0Hessq',
          #access_token = 'boomgoes',
          #access_token_secret = 'thedynamite')
    try:
        
        tso = TwitterSearchOrder()
        tso.set_keywords(["#lol"])
        tso.set_negative_attitude_filter()
        tso1 = TwitterSearchOrder()
        tso1.set_keywords(["#lol"])
        tso1.set_positive_attitude_filter()

        ts = TwitterSearch(
            consumer_key = 'oiqPmaj6hTVywkKIizDzw50l8',
            consumer_secret = 'jHSBDW8E1doOnKKfabdMEnJFIIU0UCz9ufVwvhNhIDNu0Hessq',
            access_token = '42780587-4kuFLgjRn0sq4EyE1hUqdsXhkPRMt0SwvFfHsv3Dr',
            access_token_secret = 'MuljzOQYyOyaI4A0098vVDZc6xTcBeCfRSi0iUbYDMrDc')

        neg = 0
        pos = 0

        for tweet in ts.search_tweets_iterable(tso):
            
            neg = neg+1
        print(neg)


        for tweet in ts.search_tweets_iterable(tso1):

            pos = pos+1
        print(pos)

    except TwitterSearchException as e:
        print(e)
def coleta_tweets(query, max = 3000):
    #recebe o termo de consulta e a quantidade máxima de tweets por busca
    #escreve os resultados em um arquivo csv, sem sobreposição de dados 
    
    #contador
    i = 0
    #recebe o termo de busca
    search = query
	
	#abre um aquivo csv com o nome do termo de busca
    with open(search+'.csv', 'a') as outf: # o parametro "a" indica que não averá sobreposição
		#cada novo registro será adicionado ao final do arquivo
        writer = csv.writer(outf,delimiter='|') #utiliza o | como delimitador de campo
        writer.writerow(['user','time','tweet','latitude','longitude'])
        try:
            tso = TwitterSearchOrder()
            tso.set_keywords([search])
            tso.set_language('pt') # Apenas Tweets em portugues
 
            ts = TwitterSearch(
				access_token  = '',
				access_token_secret  = '',
				consumer_key  = '',
				consumer_secret  = ''
				)
 
            for tweet in ts.search_tweets_iterable(tso):
                lat = None
                long = None
                time = tweet['created_at'] # base UTC
                user = tweet['user']['screen_name']
                tweet_text =  unicodedata.normalize('NFKD',tweet['text'].strip()).encode('ascii', 'ignore')
                #tweet_text =  tweet['text'].strip().encode('ascii', 'ignore')
                tweet_text = ''.join(tweet_text.splitlines())
                print i,time,
                if tweet['geo'] != None and tweet['geo']['coordinates'][0] != 0.0: #tratamos para evitar sujeira
                    lat = tweet['geo']['coordinates'][0]
                    long = tweet['geo']['coordinates'][1]
                    print('@%s: %s' % (user, tweet_text)), lat, long
                else:
                    print('@%s: %s' % (user, tweet_text))
 
                writer.writerow([user, time, tweet_text, lat, long])
                i += 1
                if i > max:
                    return()
 
        except TwitterSearchException as e: #apenas para mostrar o tipo de erro, caso exista
            print(e)
Ejemplo n.º 22
0
class TwitterScrape(ScrapeHelper):

    # ----------------------------------------------------------------------------------------------------------------------------------------
    # For twitter we are using the python module Twitter Search - which is a wrapper around the standard APIs provided by Twitter.
    # We need to initialize consumer key and secret and access token and secret. That's what (and other is required in the future) we are doing
    # here.
    def __init__(self, helperObject):
        # it's about time to create a TwitterSearch object with our secret tokens
        self.ts = TwitterSearch(
            consumer_key=helperObject.moreConfig["apiTokens"]["twitter_consumer_key"],
            consumer_secret=helperObject.moreConfig["apiTokens"]["twitter_consumer_secret"],
            access_token=helperObject.moreConfig["apiTokens"]["twitter_access_token"],
            access_token_secret=helperObject.moreConfig["apiTokens"]["twitter_access_token_secret"],
        )
        self.domain = "twitter"
        self.actualPost = None
        self.postTime = None
        self.resultsFound = False
        # ----------------------------------------------------------------------------------------------------------------------------------------

        # ----------------------------------------------------------------------------------------------------------------------------------------
        # method that actually scrapes Twitter

    def scrapeIt(self, helperObject):
        try:
            tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
            tso.set_keywords(
                [helperObject.currentlySearchingFor]
            )  # let's define all words we would like to have a look for
            tso.set_include_entities(False)  # and don't give us all those entity information

            for tweet in self.ts.search_tweets_iterable(tso):
                # We directly have the link to the Tweet itself. So our actual post param hods just the link.
                # Hitting this link in the browser would take you to the actual tweet itself.
                self.resultsFound = True
                self.actualPost = "https://twitter.com/statuses/" + str(tweet["id"])
                self.postTime = str(tweet["created_at"].encode("utf-8"))
                print "@" + str((tweet["user"]["screen_name"]).encode("utf-8")) + " tweeted " + str(
                    tweet["text"].encode("utf-8")
                ) + "and the time was " + self.postTime + " and the id of the tweet is " + self.actualPost
                helperObject.prepareDbData(self.domain, self.actualPost, self.postTime)

            if not self.resultsFound:
                print "\nBooo...! the lad's missing !"

            print "\n\n"

        except TwitterSearchException as e:  # take care of all those ugly errors if there are some
            print (e)
Ejemplo n.º 23
0
def twitSearch(keywords):
    # create a TwitterSearchOrder object, configure settings
    tso = TwitterSearchOrder() 
    tso.set_keywords(keywords)
    # tso.set_language('en') # filter by language? no, for now
    tso.set_include_entities(True)

    # setup all the secrets
    ts = TwitterSearch(
        consumer_key          = private.consumer_key,    
        consumer_secret       = private.consumer_secret,  
        access_token          = private.access_token,        
        access_token_secret   = private.access_token_secret       
    )

    # do the search, save it for amounts of fun that are not rate limited 
    return ts.search_tweets_iterable(tso)
Ejemplo n.º 24
0
def getTweets(keywords):
	
	# let's set a limit to 1000 tweets
	limit_count = 1000

	# pega o input e transformar em um array de strings
	keywords = keywords.split(" ")

	try:
	    tso = TwitterSearchOrder() # create a TwitterSearchOrder object
	    tso.set_keywords(keywords) # let's define all words we would like to have a look for
	    tso.set_language('pt') # we want to see German tweets only
	    tso.set_count(100) # please dear Mr Twitter, only give us 100 results per page
	    tso.set_include_entities(False) # and don't give us all those entity information
	    # tso.setGeocode(-23.516394,-46.63554,1000,km=True)#I want only posts near Sao Paulo
	    # tso.setUntil(datetime.date(2014, 01, 26))

	    ts = TwitterSearch(
			consumer_key = 'VqUEyEu7lXO5z5lWqoTkYOUmZ',
			consumer_secret = 'aiGUuiCwWNrYherJ5USuPkeZi3WMdCJj0ZsIGtWSHlZgPeqpmI',
			access_token = '490611801-3ygNslO3ZvKXsGm1wZA1AdIwKO858jAsa66orMbd',
			access_token_secret = 'NZKkVwOYnz5BR1rf34PLHuODDnYlIaf52fbNBcMuUvU7b'
		)

	    for tweet in ts.search_tweets_iterable(tso): # this is where the fun actually starts :)
	        print( 'at %s' %   tweet['created_at'], tweet['text'])


	        date = tweet['created_at']
	        info = tweet['text'] + " " + date
	        data2Save = unicodedata.normalize('NFKD', info).encode('utf-8','ignore')

	        saveFile = open('tweets.txt', 'a')
	        saveFile.write(data2Save)
	        saveFile.write('\n')
	        saveFile.close()

	        limit_count -= 1
	        
	        if limit_count == 0:
				break


	except TwitterSearchException as e: # take care of all those ugly errors if there are some
	    print(e)
Ejemplo n.º 25
0
def Tweety():    
    try:
        Twitter_User = TwitterUserOrder('EmreOvunc')        
        Twitter_Obj = TwitterSearch(
            consumer_key = 'XX',
            consumer_secret = 'XX',
            access_token = 'XX',
            access_token_secret = 'XX'
         )
        for tweet in Twitter_Obj.search_tweets_iterable(Twitter_User):
            tweets=( '@%s : %s \n' % (tweet['user']['screen_name'], tweet['text']) )
            
            # You can use if statement for spesific tweets
            # or skip this part to save all tweets
            if '#TweetyBot' in tweets:
                if CheckTweets(tweets[13:])== 1:                
               		break;
    except TwitterSearchException as error:
        print(error)
def collectTweets(file,keywords,collection,numTweets):
    header = False
    if os.path.exists(file):
        header = True

    #open a csv file for writing
    if "Test" in file:
        csvfile= open(file, 'wb')
    else:
        csvfile= open(file, 'ab')

    writer = writeHeader(csvfile,header)
    names = keywords.split()
    entity =[]
    for keyowrd in names:
        entity.append(keyowrd.lower())    
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.set_keywords(entity)
        tso.set_language('en') 
        tso.set_include_entities(False) # don't give all the entity information

        # create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key = '',
            consumer_secret = '',
            access_token = '',
            access_token_secret = ''
         )
        global count 
        for tweet in ts.search_tweets_iterable(tso):
            tweetText = tweet['text']
            tweetText = removeHttps(tweetText.encode('ascii','ignore'))
            insertIntoDB(collection,tweetText,writer,keywords)
            if count == numTweets:
                break;
    
        print("Total tweets collected for "+keywords+ " are "+ str(count))
    except TwitterSearchException as e: # handle errors if there are some
        print(e)

    finally:
        csvfile.close()
Ejemplo n.º 27
0
def tweetFetch(keywords):
	filtered_tweets=[]
	try:
	    print "fetching from Twitter"
	    tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
	    tso.set_keywords(keywords)  # let's define all words we would like to have a look for
	    tso.set_count(10)
	    # tso.set_result_type("popular")
	    tso.set_language('en')  # we want to see English tweets only
	    tso.set_include_entities(False)  # and don't give us all those entity information

	    # it's about time to create a TwitterSearch object with our secret tokens

	    ts = TwitterSearch(consumer_key='GML650EJDB9kGTmLOV1fmiKnE',
	                       consumer_secret='Lssnyk1dDmPYS86USSCjD8SBV3HAJPjUJQNemAhowv6jCwIVez'
	                       ,
	                       access_token='3223420976-qydbZ9X79cLsKhUVdqTBOs7Rg9sh3sTjUNzuc3B'
	                       ,
	                       access_token_secret='bs4LDtdKkkCTpTZyqm49hSF1o2igfFkw2eSHvYbLigAsM'
	                       )

	     # this is where the fun actually starts :)
	    count=0
	    tweet_limit=9 #limit to first 10 results

	    for tweet in ts.search_tweets_iterable(tso):
	        # print tweet['text']
	        sent_resp=sentimentAnalysis(tweet['text'])
	        count+=1
	        tweet_object={}
	        tweet_object['post']=tweet['text']
	        if 'score_tag' in sent_resp:
	        	tweet_object['score_tag']=sent_resp['score_tag']
	        if(tweet_object not in filtered_tweets):
	        	filtered_tweets.append(tweet_object)
	        print count
	        if(count>tweet_limit):
	        	break

	except TwitterSearchException, e:

	    print e  # take care of all those ugly errors if there are some
Ejemplo n.º 28
0
def search(searchTerm,lang=LANGUAGE):
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.set_keywords([searchTerm]) # let's define all words we would like to have a look for
        tso.set_language(lang) # we want to see German tweets only
        tso.set_include_entities(True) 
    
        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
        consumer_key = TWIT_CONSUMER_KEY,
        consumer_secret = TWIT_CONSUMER_SECRET,
        access_token = TWIT_ACCESS_TOKEN,
        access_token_secret = TWIT_ACCESS_SECRET
        )
    
        return ts.search_tweets_iterable(tso)
    
    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print (e)
        return -1
Ejemplo n.º 29
0
    def get(self, crowd_request, data, **kwargs):
        try:
            # import ipdb; ipdb.set_trace()
            tso = TwitterSearchOrder() # create a TwitterSearchOrder object
            tso.set_keywords(['earthquake']) # let's define all words we would like to have a look for
            tso.set_include_entities(False) # and don't give us all those entity information
            ts = TwitterSearch(
                consumer_key = 'O92NoyCEQsUq7swRKg',
                consumer_secret = 'dD7NP6ZTOv9KX28Iw6O9gtgu5MpbzTG5qyfdd7S99Y',
                access_token = '46171206-lfcESnE0WfZ8iCb4QEfreOco3PuLodM0p2lp3gC9s',
                access_token_secret = 'CURU7xIS2InzDHF5LtPpZ8gLXjWg0M3okMbmCHrIWdI'
            )

            for tweet in ts.search_tweets_iterable(tso):
                print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
                tweet_id = tweet["id"]
                break
        except TwitterSearchException as e: # take care of all those ugly errors if there are some
            print e
        return {"status": "OK", "tweet_id": tweet_id, "template": "pick_tweet_hashtags.html"}
def getTweets(keywords):
    data=[]
    try:
        ts = TwitterSearch(
              consumer_key = environ['CK'],
              consumer_secret = environ['CS'],
              access_token = environ['AT'],
              access_token_secret = environ['ATS']
           )
        tso = TwitterSearchOrder()
        tso.set_keywords(keywords, or_operator=True)
        for tweet in ts.search_tweets_iterable(tso):

            ts = time.strftime('%m-%d-%y %H:%M', time.strptime(tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))

            data.append({'date': ts, 'text': tweet['text'], 'avatar': tweet['user']['profile_image_url_https'], 'user': tweet['user']['screen_name'], 'id': tweet['id'], 'rt': tweet['retweet_count']})

        return(data)

    except TwitterSearchException as e:
        print(e)
Ejemplo n.º 31
0
     sarcastic_tso.set_include_entities(True)
     sarcastic_tso.arguments.update({"tweet_mode": "extended"})
 if args.serious_path:
     serious_tso = TwitterSearchOrder()
     serious_tso.set_keywords(["-#sarcasm"])  # query tweets w/o #sarcasm
     serious_tso.set_language('en')
     serious_tso.set_include_entities(True)
     serious_tso.arguments.update({"tweet_mode": "extended"})
 # query twitter API and populate tweet lists
 try:
     ts = TwitterSearch(consumer_key=CONSUMER_KEY,
                        consumer_secret=CONSUMER_SECRET,
                        access_token=ACCESS_TOKEN,
                        access_token_secret=ACCESS_SECRET)
     if args.sarcastic_path:
         for sarcastic_tweet in ts.search_tweets_iterable(sarcastic_tso):
             if not sarcastic_tweet['full_text'].lower().startswith('rt'):
                 sarcastic_tweets.append({
                     'id':
                     sarcastic_tweet['id'],
                     'urls':
                     not not sarcastic_tweet['entities']['urls'],
                     'media':
                     "media" in sarcastic_tweet["entities"],
                     'text':
                     sarcastic_tweet['full_text']
                 })
     if args.serious_path:
         for serious_tweet in ts.search_tweets_iterable(serious_tso):
             if not serious_tweet['full_text'].lower().startswith('rt'):
                 serious_tweets.append({
Ejemplo n.º 32
0
def convert(schoolName,key1,key2,key3,key4):
    #importing the databases of campuses & locations

    campus_loc = {} #{name:[long:lat]}
    with open('campuses.csv') as f:
        reader = csv.reader(f)
        reader.next()
        for row in reader:
            campus_loc[row[1]] = [float(row[3]), float(row[4])]

    school = schoolName #takes input and renames it for convenience's sake since every other variable name is schoolName #whoops

    """
    counter = 0 #this entire block will never be run unless we take user input
    while school not in campus_loc.keys():
            counter += 1
            print "Sorry! Doesn't look like we have that school. Try again?"
            if counter > 5:
                    print "Wow, you really suck at this."
            school = raw_input("> ")
    """
    location = campus_loc[school] #gets the location from campus_loc using the school name

    ############## begin Twitter API call now

    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.set_keywords([' ']) # let's define all words we would like to have a look for
        tso.set_geocode(location[1],location[0],1)
        #tso.set_language('de') # we want to see German tweets only
        tso.set_include_entities(False) # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key = str(key1),
            consumer_secret = str(key2),
            access_token = str(key3),
            access_token_secret = str(key4)
            #consumer_key = 'ZrOjYYBF9ALwFlHjnJ6uRwBk6',
            #consumer_secret = 'QraDsU5pdeSTT7qNcReCJZYwmX94Q6S7yb0EcagPumCNmafufq',
            #access_token = '16986893-nUTUdtEcf2HffYJYpVJYbR2p85EeSmvvZSD2VsCLS',
            #access_token_secret = 'I8V9DJNFWQX0wh0gNbfLT2bAWpj4uS7rtUioFqyDjnVf3'
           # consumer_key = 'naPGQJt3L75sZiEZAplsETEp1',
        #   consumer_secret = 'rqyRlly80nw1XUGxt7ySR2ZUvQiqDKpbj8kEgDXJ63U1AWbvAr',
           # access_token = '16986893-Zq2L75kDTGR0l3AnsSN1ZXkzw4o8NuBssLCyvxkES',
           # access_token_secret = 'woWPf0kgYoJcz6w4mNUBc8tt2bWqwdSCoMPEYXsWU9Y6w'
         )

         # this is where the fun actually starts :)
        collegeTweets = []
        counter = 0
        for tweet in ts.search_tweets_iterable(tso):
            if counter < 50:
                collegeTweets.append(tweet['text'].encode('ascii','ignore')) #ignores any ASCII
                collegeTweets[-1] = string.replace(collegeTweets[-1],"#","") #pulls out pound signs to make hashtags part of the sentence/string
                collegeTweets[-1] = string.replace(collegeTweets[-1],"\n","")  #pulls out any returns which f**k up the sentence
                collegeTweets[-1] = string.replace(collegeTweets[-1],"@","") #pulls out any mentions
                collegeTweets[-1] = remove_http(collegeTweets[-1],"http") #pulls out any links (aka to pictures since the majority of these are linked from Instagram)
                counter += 1
                collegeTweets[-1] += "a"

    except TwitterSearchException as e:
        print(e)    

    ############### begin Indico API call now

    if not collegeTweets:
        return 0.5

    indicoio.config.api_key = "f09f509655f721e3adac6df5b35abfed"
    api_key_Lisa = "f09f509655f721e3adac6df5b35abfed"

    sentementCollegeTweets = sentiment(collegeTweets) #take the Twitter string agnd put it into our own string because we needed an array of the indico results

    average = 0.0
    for i in sentementCollegeTweets:
        average += i
    average = average/len(sentementCollegeTweets)

    print average
                   access_token='',
                   access_token_secret='')  # create twitter search token

SelfieTweets = pickle.load(open("SelfieTweets", "r"))
usercount = 0
for selfietweet in SelfieTweets[0:len(SelfieTweets)]:
    usercount += 1
    print usercount
    # setting user to search:
    user = int(selfietweet["UserID"])
    print user
    try:
        tuo = TwitterUserOrder(user)  # create a TwitterSearchOrder object
        tweets = []
        count = 0
        for tweet in ts.search_tweets_iterable(tuo):  #search tweets
            count += 1
            try:
                TweetID = tweet["id_str"]
                UserID = str(tweet["user"]["id_str"])  #collecting data
                text = tweet['text']

                #appending tweet to tweets array
                data = {
                    "UserID": UserID,
                    "TweetID": TweetID,
                    "text": text,
                    "Json": tweet
                }
                tweets.append(data)
Ejemplo n.º 34
0
def _do_ocr_and_lookup(img_obj):
    limit_of_tweets = int(args.limit)
    # Replace line breaks with a space and split text into an array
    text = pytesseract.image_to_string(img_obj,
                                       lang='eng').replace('\n',
                                                           ' ').split(' ')
    for element in text:
        if element and element[0] == '@':
            # Since handles cannot have spaces, strip until space
            potential_user = element.split(' ')[0]
            break

    config = configparser.RawConfigParser()
    config.readfp(open('twitter.config'))

    # Just in case, the dude/dudette using the program puts in ' or " in the config.
    consumer_key = config.get('twitter',
                              'consumer_key').replace('\'',
                                                      '').replace('\"', '')
    consumer_secret = config.get('twitter', 'consumer_secret').replace(
        '\'', '').replace('\"', '')
    access_token = config.get('twitter',
                              'access_token').replace('\'',
                                                      '').replace('\"', '')
    access_token_secret = config.get('twitter', 'access_token_secret').replace(
        '\'', '').replace('\"', '')

    try:
        tuo = TwitterUserOrder(potential_user[1:])
        ts = TwitterSearch(consumer_key=consumer_key,
                           consumer_secret=consumer_secret,
                           access_token=access_token,
                           access_token_secret=access_token_secret)
        tweets = []
        for tweet in ts.search_tweets_iterable(tuo):
            # Nobody cares about re-tweets
            if 'RT ' not in tweet['text']:
                if tweet not in tweets:
                    tweets.append((tweet['text'], tweet['id']))
                if not limit_of_tweets:
                    break
                else:
                    limit_of_tweets -= 1
        body = text[text.index('') + 1:]
        try:
            stripped_body = body[:body.index('')]
        except:
            stripped_body = body

        for tweet in tweets:
            removed_elements = 0
            ltweet, orig_len = tweet[0].split(' '), len(tweet[0].split(' '))
            for ele in stripped_body:
                if ele in ltweet:
                    removed_elements += 1
                    ltweet.remove(ele)
            removal_rate = (removed_elements / float(orig_len)) * 100
            if removal_rate > 75.0:
                print("*** Tweet is probably real! ***")
                print("-> Confidence : " + str(removal_rate))
                print("-> URL : https://twitter.com/" + potential_user[1:] +
                      "/status/" + str(tweet[1]))

    except TwitterSearchException as e:  # catch all those ugly errors
        print(e)
tso.set_language('en')
tso.set_include_entities(False)

userTweets = {}


ts = TwitterSearch(consumer_key = '[your consumer key]',\
                 consumer_secret = '[your consumer secret]',\
                 access_token = '[access token]',\
                 access_token_secret = '[access token secret]'\
        )

outfile = open('tweetfile.txt', 'w')
outfile.write('User\t\t\Tweets')
outfile.write('\n.........................................................')

for tweet in ts.search_tweets_iterable(tso):
    username = tweet['user']['name'] + '(@' + tweet['user']['screen_name'] + ')'
    if username in userTweets.keys():
        tweetlist = userTweets[username]
        tweetlist.append(tweet['text'].encode('utf8'))
        userTweets[username] = tweetlist
    else:
        userTweets[username] = [tweet['text'].encode('utf8')]

for usrNm, tweets in userTweets.iteritems():
    for tweet in tweets:
        outfile.write("\n" + usrNm.encode('utf8') + "\t\t" + tweet + "\n")

outfile.close()
Ejemplo n.º 36
0
def gettweets(hashtag):
    if (os.path.isdir(".cache/")):
        datei = '.cache/{}.json'.format(hashtag)
    if not (os.path.isdir(".cache/")):
        os.system("mkdir .cache")
        print("Cache Ordner wurde erstellt!")
        datei = '.cache/{}.json'.format(hashtag)
    if (os.path.isfile(datei) and chachetime(datei)):  # Wenn Cache existiert
        with open(datei) as cachefile:  # Lese Cache aus
            print("Ausgabe aus Cache " + datei)
            print("Cache wurde zuletzt vor " + str(round(daysDiff / 3600, 2)) +
                  " Stunde/n aktualisiert.")
            return json.loads(cachefile.read())  # Gebe Cache aus

    print("Chache {} wird neu erstellt".format(datei))
    tso = TwitterSearchOrder()  # Twitter Objekt erstellen
    tso.set_keywords([hashtag])  # Wir suchen nach einem hashtag
    tso.set_language('de')  # Nur Deutsche Tweets
    tso.set_include_entities(False)  # Kein Entity Zeug ausgeben

    Config = configparser.ConfigParser()
    configfiles = ['config.ini', 'config2.ini', 'config3.ini']
    configfile = random.choice(configfiles)
    print("Verwende " + configfile)
    if os.path.isfile(configfile):
        Config.read(configfile)
    else:
        print(
            "The config file does not exist, please create a new config with the example file"
        )
        sys.exit()

    consumer_key = Config.get("Twitter API", "consumer_key")
    consumer_secret = Config.get("Twitter API", "consumer_secret")
    access_token = Config.get("Twitter API", "access_token")
    access_token_secret = Config.get("Twitter API", "access_token_secret")

    # Objekt mit Zugangsdaten erstellen
    ts = TwitterSearch(consumer_key=consumer_key,
                       consumer_secret=consumer_secret,
                       access_token=access_token,
                       access_token_secret=access_token_secret)

    tweets = set()
    counter = 0
    for tweet in ts.search_tweets_iterable(tso):
        if (counter <= 100):
            counter += 1
            tweets.add(tweet['text'])
        else:
            break

    tweets = list(tweets)

    with open(datei, 'w') as cachefile:
        tweetsasjson = json.dumps(tweets)
        cachefile.write(tweetsasjson)

    if (datei == "jugendhackt.json"):
        # os.system("sudo cp /home/pi/Hashdistribution/backend/.cache/jugendhackt.json /var/www/html/data/jh.json")
        print("Startseiten Tweets aktualisiert!")
    return tweets
Ejemplo n.º 37
0
def find_polarity(topic):
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords([topic])
        tso.set_language('en')
        ts = TwitterSearch(
            consumer_key='JUBWToPuyPfmzg8n117ZTllfB',
            consumer_secret=
            'lt0Psg46Nqzzaa4uel3wtSbaOyh9WiYIqx6ZH5xaExthndrsc1',
            access_token='1172272055183728640-nLQg9fvsLVieB9BXSsJq86a6kMmR8p',
            access_token_secret='5ogC7PXA1nmlNd5FCYtNaSIhF7tyA5K7CZzNBhi8qIhv1'
        )

        # lists that will become columns in our data frame
        user_list = []
        clean_list = []
        favorited_list = []
        retweet_list = []
        its_a_retweet = []

        count = 0

        for tweet in ts.search_tweets_iterable(tso):
            if count == 50:
                break
            tw_obj = api.get_status(
                tweet['id_str']
            )  # need the twitter object to check if it's a retweet
            if not hasattr(tw_obj, 'retweeted_status'):
                user_list.append(tweet['user']['screen_name'])  # user
                cleaned = clean_text(
                    tweet['text']
                )  # removing unnecessary symbols from the tweet's string
                clean_list.append(cleaned)
                favorited_list.append(
                    tweet['favorite_count'])  # num of favorites
                retweet_list.append(tweet['retweet_count'])  # num of retweets
                its_a_retweet.append(hasattr(tw_obj, 'retweeted_status'))
                count += 1

        # creating the data frame
        # To make a column, you need a list
        df = pd.DataFrame(user_list)
        df['Tweet Text'] = clean_list
        df['Number of Favorites'] = favorited_list
        df['Number of Retweets'] = retweet_list
        df['Polarity'] = df['Tweet Text'].apply(Polarity)
        # df['Retweet?'] = its_a_retweet

        # print(df)

        # sort data frame by polarity
        sortedDF = df.sort_values(by=['Polarity'],
                                  ignore_index=True,
                                  ascending=False)

        # sort data frame by favorites
        sortedFavDF = df.sort_values(by=['Number of Favorites'],
                                     ignore_index=True,
                                     ascending=False)

        # gathering polarity data from df
        pol_count = 0.0
        for i in df.index:
            pol_count += df['Polarity'][i] * df['Number of Favorites'][i] + df[
                'Polarity'][i] * 2.0 * df['Number of Retweets'][i]
            pol_count += df['Polarity'][i]
        avg_pol = pol_count

        print("Average Polarity: ", avg_pol)
        print("Overall Rating: ", rating(avg_pol), '\n')
        label['text'] = "Average Polarity: " + str(
            avg_pol) + '\n' + "Overall Rating: " + rating(avg_pol) + '\n'

        # 5 Most Popular Tweets
        print("5 most popular tweets: ")
        label['text'] += '\n' + "5 most popular tweets: " + '\n'
        for i in range(0, 5):
            label['text'] += str(
                i + 1) + ") " + sortedFavDF['Tweet Text'][i] + '\n'
            print(str(i + 1) + ") " + sortedFavDF['Tweet Text'][i])

        # 5 Most Positive Tweets
        print("\n5 Most Positive Tweets: ")
        label['text'] += '\n' + "5 most positive tweets: " + '\n'
        for i in range(0, 5):
            label['text'] += str(i +
                                 1) + ") " + sortedDF['Tweet Text'][i] + '\n'
            print(str(i + 1) + ") " + sortedDF['Tweet Text'][i])

        print("\n5 Most Negative Tweets: ")
        label['text'] += '\n' + "5 most negative tweets: " + '\n'
        j = 1
        for i in range(df['Tweet Text'].size, df['Tweet Text'].size - 5, -1):
            label['text'] += str(j) + ") " + sortedDF['Tweet Text'][i -
                                                                    1] + '\n'
            print(str(j) + ") " + sortedDF['Tweet Text'][i - 1])
            j += 1

    except TwitterSearchException as e:
        print(e)
Ejemplo n.º 38
0
def get_all_tweets(screen_name):
    # Twitter only allows access to a users most recent 3240 tweets with this method

    # authorize twitter, initialize tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)

    # initialize a list to hold all the tweepy Tweets
    alltweets = []

    # make initial request for most recent tweets (200 is the maximum allowed count)
    new_tweets = api.user_timeline(screen_name=screen_name, count=10)

    # save most recent tweets
    alltweets.extend(new_tweets)

    # save the id of the oldest tweet less one
    oldest = alltweets[-1].id - 1

    # keep grabbing tweets until there are no tweets left to grab
    while len(new_tweets) > 0:

        # all subsiquent requests use the max_id param to prevent duplicates
        new_tweets = api.user_timeline(screen_name=screen_name,
                                       count=10,
                                       max_id=oldest)

        # save most recent tweets
        alltweets.extend(new_tweets)

        # update the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1
        if (len(alltweets) > 15):
            break
        print("...%s tweets downloaded so far" % (len(alltweets)))

    # write tweet objects to JSON
    file = open('tweet.json', 'w')
    print("Writing tweet objects to JSON please wait...")
    for status in alltweets:
        json.dump(status._json, file, sort_keys=True, indent=4)

    # close the file
    print("Done")
    file.close()

    for tweet in tweepy.Cursor(api.search, q='BBC').items(20):
        print('Tweet by: @' + tweet.user.screen_name)

    # key word for search in BBC new
    key_word = input("type in the keyword in BBC news to search: \n")

    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords([
            'BBC News', key_word
        ])  # let's define all words we would like to have a look for
        tso.set_language('en')  #only English tweets
        tso.set_include_entities(
            False)  # and don't give us all those entity information

        # exclude the retweet!!!

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            # Twitter API credentials
            # warning!!!!
            # Before upload to GIthub, replace them!!!
            consumer_key="",
            consumer_secret="",
            access_token="",
            access_token_secret="")

        file = open('TweetOutput.txt', 'w+')
        print("Writing tweet objects to txt, please wait...")

        number = 0
        # record for number of tweets we have
        # this is where the fun actually starts :)
        for tweet in ts.search_tweets_iterable(tso):
            if number <= 100:
                # file.write('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ))
                file.write('@%s tweeted: %s' %
                           (tweet['user']['screen_name'], tweet['text']))
                number += 1
            else:
                file.write("100 tweets we have now!")
                print("100 tweets we have now!")
                break

        print("Done")
        file.close()

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
def main(accountName, key1, key2, key3, numberTweets):
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tuo = TwitterUserOrder(accountName) # create a TwitterUserOrder
        # use parameters instead of fixed string
        tso.set_keywords([key1,key2, key3]) # let's define all words we would like to have a look for
        #tso.set_language('en') # we want to see English tweets only
        tso.set_include_entities(False) # and don't give us all those entity information

        # optional: limit the range of time, no need here
        # most recent tweets 
        # 8/27/2019 is the date burgers come out


        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            #Twitter API credentials
            #warning!!!!
            #Before upload to GIthub, replace them!!!
            consumer_key = "M",
            consumer_secret = "M",
            access_token = "M",
            access_token_secret = "Mine"
        )

        #write tweet objects to txt/Json file
        file = open('searchTweetOutput.txt', 'w+') 
        print ("Writing tweet objects to txt, please wait...")
        num = 0; #record for number of tweets we have
        numberTweet = int (numberTweets)
        # this is where the fun actually starts :)
        for tweet in ts.search_tweets_iterable(tso):
            if num < numberTweet:
                # exclude the retweet
                if (not tweet['retweeted']) and ('RT @' not in tweet['text']):
                    #file.write('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ))
                    file.write('%s tweeted: %s . This tweet ends. \n' % ( tweet['user']['screen_name'], tweet['text'] ))
                    num += 1
            else:
                result = numberTweets + " tweets we have now."
                file.write(result)
                print(numberTweets, " tweets we have now")
                break

    
        #close the file
        print ("Done")
        file.close()
    
        """
        num = 0; #record for number of tweets we have
        # this is where the fun actually starts :)
        for tweet in ts.search_tweets_iterable(tso):
            if num <= 100:
                print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
                num += 1
            else:
                print("100 tweets we have now")
                break
        """

    except TwitterSearchException as e:
        print(e)
Ejemplo n.º 40
0
def _do_ocr_and_lookup(img_obj):
    config = get_config()
    limit_of_tweets = int(args.limit)
    potential_user = '******'
    # Replace line breaks with a space and split text into an array
    text = pytesseract.image_to_string(
        img_obj, lang='eng').replace(
        '\n', ' ').split(' ')
    for element in text:
        if element and element[0] == '@':
            print("Detected handle : " + str(element))
            # Since handles cannot have spaces, strip until space
            potential_user = element.split(' ')[0]
            break

    # Just in case the person Yousing the program puts in ' or " in the config.
    consumer_key = config.get(
        'twitter',
        'consumer_key').replace(
            '\'','').replace(
                '\"','')
    consumer_secret = config.get(
        'twitter',
        'consumer_secret').replace(
            '\'','').replace(
                '\"','')
    access_token = config.get(
        'twitter',
        'access_token').replace(
            '\'','').replace(
                '\"','')
    access_token_secret = config.get(
        'twitter',
        'access_token_secret').replace(
            '\'','').replace(
                '\"','')

    if potential_user == '__fakemenot__':
        print(colored("[*] It looks like OCR failed. Please make sure you " +
            "crop the image as in sample and is readable.", 'red'))
        exit(1)

    try:
        tuo = TwitterUserOrder(potential_user[1:])
        ts = TwitterSearch(
            consumer_key=consumer_key,
            consumer_secret=consumer_secret,
            access_token=access_token,
            access_token_secret=access_token_secret
        )
        tweets = []
        body = '__awesomebody__'
        for tweet in ts.search_tweets_iterable(tuo):
            # Nobody cares about re-tweets
            if 'RT ' not in tweet['text']:
                if tweet not in tweets:
                    tweets.append((tweet['text'], tweet['id']))
                if not limit_of_tweets:
                    break
                else:
                    limit_of_tweets -= 1

        # The most probable tweet body is this.
        try:
            body = text[text.index('V') + 1:]
        except ValueError:
            body = text

        # If none of that was found, let's report an OCR error
        if body == '__awesomebody__':
            print(colored("[*] It looks like OCR failed.Please make sure you " +
                          "crop image as in sample and is readable.", 'red'))

        found_tweet = False
        # Check against every tweet pulled
        for tweet in tweets:
            removed_elements = 0
            ltweet, orig_len = tweet[0].split(' '), len(tweet[0].split(' '))
            # Compare each element of body to element in body. TODO: Optimize
            for ele in body:
                if ele in ltweet:
                    removed_elements += 1
                    ltweet.remove(ele)
            removal_rate = (removed_elements / float(orig_len)) * 100

            if int(removal_rate) > 75:
                found_tweet = True
                print(colored("[*] It looks like this is a valid tweet",
                              'green'))
                print(colored("-> Confidence : " + "%.2f" % removal_rate + "%",
                              'green'))
                print(colored("-> Potential URL : https://twitter.com/" +
                              potential_user[1:] +
                              "/status/" + str(tweet[1]), 'green'))

            elif int(removal_rate) in (55, 75):
                found_tweet = True
                print(colored("[*] This might be a valid tweet", 'yellow'))
                print(colored("-> Confidence : " + "%.2f" % removal_rate + "%",
                              'yellow'))
                print(colored("-> Potential URL : https://twitter.com/" +
                              potential_user[1:] +
                              "/status/" + str(tweet[1]), 'yellow'))

        if not found_tweet:
            print(colored("[*] I couldn't find a tweet like that. " +
                          "Try increasing the limit to pull more tweets",
                          'yellow'))

    except TwitterSearchException as e:  # catch all those ugly errors
        print(e)
Ejemplo n.º 41
0
def getTweets(query):
    #pp = pprint.PrettyPrinter(indent=4)

    try:
        if not sample:
            tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
            print(query)
            tso.set_keywords([
                query
            ])  # let's define all words we would like to have a look for
            tso.set_language('en')  # we want to see English tweets only
            tso.set_include_entities(
                False
            )  # and don't give us all those entity information   #from original code, idk what it does
            #my API data.
            ts = TwitterSearch(
                consumer_key='SwtLcZe9Im6q998K4cJqANs4n',
                consumer_secret=
                '7PMRM3ec7ltINPVl72FXurMn8Qg9HrS1NKwocYJVlTGngEFbEA',
                access_token=
                '51466054-cJUBESD4H9THIQExiKQ1HOGdR0GflXdyeIeL0TfKw',
                access_token_secret=
                'nn3ESWtluVoLSNFexAKcEesF6rEg0lTJ4QaIbFHJACFDr')

        count = 1000  #how many tweets we want to see. we want as many as possible, but do not want to sacrifice load time too much
        i = 0
        tweet_list = []
        if sample:
            print("Reading Sample File")
            for line in file.read().split('\n'):
                tweet_list.append(line)
        else:
            print("Searching....")
            for tweet in ts.search_tweets_iterable(tso):

                if i >= count:
                    break  #stops getting tweets when we have enough

                #keep this line below as a reference. from the original code:
                #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

                words = tweet['text']
                start = re.search("(((RT )?@(\w)*) ?:? )?", words)
                words = words.lstrip(start.group(0))
                tweet_list.append(words)
                i += 1
            # if we have less than 1000 tweets, the corpus is too short.
            if (len(tweet_list) < 1000):
                print(
                    "Sorry! Your search did not return enough results, please try another."
                )
                return
            print("Search complete!")
        print("Tagging...")

        tagged = CMU.runtagger_parse(sent_tokenize(
            "\n".join(tweet_list)))  #tweetset))
        print("Tagging complete!")
        print("Analyzing tags...")

        tag_table = Process.create_rules(tagged)
        syl_rules = Process.get_pos_syllables(tagged)
        rhyme_pos_table = SCD.rhyme_to_POS(tagged)
        print("Analysis Complete!")
        print("Generating poetry...")
        result1 = Process.generate_firsttwo(tag_table, syl_rules)
        r1 = result1[1]
        r2 = result1[2]
        firsttwo = result1[0]
        result2 = Process.generate_lasttwo(tag_table, syl_rules,
                                           rhyme_pos_table, r1, r2)
        lasttwo = result2
        print("A poem about " + query + ":")
        print()
        print(firsttwo)
        print(lasttwo)

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
Ejemplo n.º 42
0
    def post(self):
        api_key = 'AIzaSyBOrP8QroOlqPw0bdOFjhXnEKB0ITXRX4o'
        search_name = self.request.get("map_name")
        search_name = search_name.replace(" ", "_")
        endpoint_url = "https://www.googleapis.com/youtube/v3/search?part=snippet&maxResults=6&order=relevance&q=" + search_name + "&safeSearch=strict&type=video&key=" + api_key

        response = urlfetch.fetch(endpoint_url)
        content = response.content

        response_as_json1 = json.loads(content)
        print(response_as_json1)
        list_url = []

        for thing in response_as_json1["items"]:
            list_url.append(thing["id"]["videoId"])
        name = self.request.get("map_name")
        tuo = TwitterUserOrder(name)
        ts = TwitterSearch(
            consumer_key='dKu6bH3B6kzjjQx8SQOZix1zm',
            consumer_secret=
            '0JeXDLbdGApPxoGc7X3KKDHfLSfz9nLtrfcnRNvMCQwW3MVYG1',
            access_token='3659983230-oAz1ASVWPfp9tA6rMKmLUy9KIbt01WEvqCwwf6z',
            access_token_secret='s4S30z2lb7TNy6UqULkfSnz1lJiAxvlaDTyECjFfIq27Z'
        )

        i = 6
        tsts = []
        if i > 0:
            for tweet in ts.search_tweets_iterable(tuo):
                result = tweet['user']['screen_name'] + ": " + tweet['text']
                tsts.append(result)
                print(tsts)
                i = i - 1
                if i == 0:
                    break
                print(i)
        r = requests.post(
            'https://stevesie.com/cloud/api/v1/endpoints/3cd58c09-c547-481e-a011-180097f61f49/executions',
            headers={
                'Token': '04e4dc3c-481c-462f-875d-4e8202874ec7',
            },
            json={
                'inputs': {
                    'session_id': '2229053416%3AERftJLIFsesnIt%3A5',
                    'username': name,
                    'max_id': '',
                },
                'proxy': {
                    'type': 'shared',
                    'location': 'nyc3',
                }
            },
        )

        response_json = r.json()
        img_url = response_json['object']['response']['response_text']
        img_url = str(img_url)
        json_acceptable_string = img_url.replace('''"''', "\"")
        url = json.loads(json_acceptable_string)
        url = url['items'][0]['image_versions2']['candidates'][0]['url']

        template_var = {
            'tsts': tsts,
            'url': url,
            'list_url': list_url,
        }
        result_template = the_jinja_env.get_template('templates/results2.html')
        self.response.write(result_template.render(template_var))
Ejemplo n.º 43
0
    non_sarcastic_tweets_list = []

    # create search orders
    if args.sarcastic_path:
        sarcastic_tso = create_sarcastic_search_order()
    if args.non_sarcastic_path:
        non_sarcastic_tso = create_non_sarcastic_search_order()

    try:
        # query twitter API and populate tweet lists
        ts = TwitterSearch(consumer_key=CONSUMER_KEY,
                           consumer_secret=CONSUMER_SECRET,
                           access_token=ACCESS_TOKEN,
                           access_token_secret=ACCESS_SECRET)
        if args.sarcastic_path:
            for sarcastic_tweet in ts.search_tweets_iterable(sarcastic_tso):
                if not sarcastic_tweet['full_text'].lower().startswith('rt'):
                    sarcastic_tweets_list.append({
                        'id':
                        sarcastic_tweet['id'],
                        'urls':
                        not not sarcastic_tweet['entities']['urls'],
                        'media':
                        "media" in sarcastic_tweet["entities"],
                        'text':
                        sarcastic_tweet['full_text']
                    })
        if args.non_sarcastic_path:
            for non_sarcastic_tweet in ts.search_tweets_iterable(
                    non_sarcastic_tso):
                if not non_sarcastic_tweet['full_text'].lower().startswith(
Ejemplo n.º 44
0
class TweetCrawler:
    """ A class for tweet retrieval

    This class can be used to retrieve tweets around Cork or with relevant
    keywords
    The flag -g specifies, that tweets in a 20 mile radius around Fort Meagher
    are searched
    The flag -k specifies, that tweets with the keywords in CORK_KEYWORDS
    are searched
    """
    load_dotenv('.env')

    TWITTER_CONSUMER_KEY = os.getenv('TWITTER_CONSUMER_KEY')
    TWITTER_CONSUMER_SECRET = os.getenv('TWITTER_CONSUMER_SECRET')
    TWITTER_ACCESS_TOKEN = os.getenv('TWITTER_ACCESS_TOKEN')
    TWITTER_ACCESS_TOKEN_SECRET = os.getenv('TWITTER_ACCESS_TOKEN_SECRET')

    ANTALYA = {'keywords': ['Düden', '#Düden', '@antalyabb', 'from:antalyabb',
                            'to:antalyabb', 'Antalya', '#Antalya'],
               'geocode' : [36.852569, 30.782124],
               'name': 'Antalya'}

    ANTWERPEN = {'keywords': ['Antwerpen', '#Antwerpen', '@Stad_Antwerpen',
                              'to:Stad_Antwerpen', '@StadsLab2050', 'to:StadsLab2050',
                              '@DgplsAntwerpen', 'to:DgplsAntwerpen', '@DeZomer',
                              'to:DeZomer', '@LPAntwerpen', 'to:LPAntwerpen', '@PortofAntwerp',
                              'to:PortofAntwerp', '@SlimnaarA', 'to:SlimnaarA',
                              '@BZAntwerpen', 'to:BZAntwerpen', '@BusinessInA',
                              'to:BusinessInA'],
                 'geocode': [51.258820, 4.355700],
                 'name': 'Antwerpen'}

    CORK = {'keywords': ['Fort Meagher', 'Cork', 'Crosshaven', 'Camden Fort',
                         '#Cork', '#FortMeagher','#Crosshaven', '#lovecork',
                         '#purecork', '#visitCork', '@Corkcoco',
                         'from:Corkcoco', 'to:Corkcoco'],
            'geocode': [51.809083, -8.279279],
            'name': 'Cork'}

    THESSALONIKI = {'keywords': ['#Thessaloniki', 'Thessaloniki', '#Thermaikos',
                                 'Thermaikos', '@ThessalonikCity',
                                 'to:ThessalonikCity', '@AtThessaloniki'],
                    'geocode': [40.563893, 23.024136],
                    'name': 'Thessaloniki'}
    
    CITIES = [ANTALYA, ANTWERPEN, CORK, THESSALONIKI]

    def __init__(self):
        """
        Ensures, that the necessary access settings are set and creates
        TwitterSearchOrder instance for further search options
        """
        self.ts = TwitterSearch(
            consumer_key=self.TWITTER_CONSUMER_KEY,
            consumer_secret=self.TWITTER_CONSUMER_SECRET,
            access_token=self.TWITTER_ACCESS_TOKEN,
            access_token_secret=self.TWITTER_ACCESS_TOKEN_SECRET
        )
        self.tso = TwitterSearchOrder()

    def get_by_location(self, city):
        """
        Retrieves tweets in a 20 mile radius around Fort Meagher
        :param: Dict
        :return: None
        """
        # keywords serve as a placeholder, as empty keywords are not allowed in
        # TwitterSearch
        self.tso.set_keywords([" ", ".", ","], or_operator = True)
        self.tso.set_geocode(
            #self.CORK_GEOCODE_METRICS[0],
            #self.CORK_GEOCODE_METRICS[1],
            city['geocode'][0],
            city['geocode'][1],
            20,
            imperial_metric=True
            )
        self.get_tweets(city)

    def get_by_keywords(self, city):
        """
        Retrieves tweets according to the Keywords in CORK_KEYWORDS
        :return: None
        """
        #self.tso.set_keywords(self.CORK_KEYWORDS, or_operator = True)
        self.tso.set_keywords(city['keywords'], or_operator = True)
        self.get_tweets(city)

    def get_tweets(self, city):
        """
        Retrieves and saves tweets in a file.
        The options used for retrieval are saved in the tso object of the
        TweetCrawler instance
        :return: None
        """
        output_dir = 'outputs'
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        # file for the shortened csv
        filename = 'Tweets_' + city['name'] + '_' + datetime.now().strftime("%Y-%m-%d %H:%M:%S") \
                   + '.csv'
        # file for the json containing the entire retrieved information
        filename_raw = 'Tweets_'  + city['name'] + '_' + datetime.now().strftime("%Y-%m-%d %H:%M:%S") \
                      + '.json'

        try:
            self.tso.set_language('en')
            # this option ensures, that tweets are not truncated
            self.tso.arguments.update({'tweet_mode': 'extended'})
            # self.tso.set_include_entities(False) # provides entity information

            with open(os.path.join(output_dir, filename), 'w') as f:
                tweets = []
                csvWriter = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
                csvWriter.writerow(['user', 'tweet'])
                for tweet in self.ts.search_tweets_iterable(self.tso):
                    csvWriter.writerow([
                        tweet['user']['screen_name'].encode('utf-8'),
                        tweet['full_text'].encode('utf-8')
                    ])
                    # this is more of a workaround as using the noniterable
                    # option of TwitterSearch 'search_tweets(tso)' caps the
                    # amount of retrieved tweets
                    tweets.append(tweet)
                # writes the total number of retrieved tweets at the end
                csvWriter.writerow(['tweets', len(tweets)])
            print("Retrieved Tweets in: " + output_dir + "/" + filename)

            # writes the entire data collected into a .json file
            with open(os.path.join(output_dir, filename_raw), 'w') as f_raw:
                json.dump(tweets, f_raw)

        except TwitterSearchException as e:
            print(e)
        except:
            print('Unexpected error: ' + sys.exc_info()[0])
Ejemplo n.º 45
0
    #PEGANDO DADOS DO TWITTER
    try:

        ts = TwitterSearch(
            consumer_key='mgpSEJ1Fu9le1ND0iulsoWHaM',
            consumer_secret=
            'HMvPmooVtTmiJkfwixOORifcUZz1C442AbYz2Nodg8k0kFKjjP',
            access_token='1086082843476938753-6ax1DxzwPMrMfqmganAcnLJ31amKrI',
            access_token_secret='O7IT1RuvhO5KhhAF7Vf5uI5TekF3VaqrYjjosnH3nYwQ2'
        )

        tso = TwitterSearchOrder()
        tso.set_keywords([word])
        tso.set_language('pt')

        result = ts.search_tweets_iterable(tso)

        #GUARDANDO OS DADOS
        cont = 0
        for tweet in result:
            writer.writerow({
                'name':
                str(tweet['user']['name']).translate(non_bmp_map),
                'screen_name':
                str(tweet['user']['screen_name']).translate(non_bmp_map),
                'location':
                str(tweet['user']['location']).translate(non_bmp_map),
                'followers_count':
                str(tweet['user']['followers_count']).translate(non_bmp_map),
                'friends_count':
                str(tweet['user']['friends_count']).translate(non_bmp_map),
Ejemplo n.º 46
0
def searchTweets(keywordLists=None,
                 keywords=None,
                 language=None,
                 geo_lat=None,
                 geo_lng=None,
                 geo_rad=None,
                 timeStart=None,
                 timeStop=None,
                 no_entities=False,
                 no_retweets=False,
                 no_links=False,
                 no_answers=False):
    tweetsFound = []
    tweetsCount = 0
    tso = TwitterSearchOrder()
    # remove all restrictions from previos calls:
    tso.remove_all_filters()
    # this makes sure no videos/pics are commented
    tso.set_keywords([
        "-video", "-pic", "-foto", "-funny", "-clip", "-vid", "-movie", "-song"
    ])  # append more synonyms and other languages TODO
    try:
        tso = TwitterSearchOrder()
        if keywordLists != None:
            for keywordList in keywordLists:
                tso.add_keyword(keywordList, or_operator=True)
        if keywords != None:
            for keyword in keywords:
                tso.add_keyword(keyword, or_operator=True)
        if language != None:
            tso.set_language(str(language))
        if geo_rad != None and geo_lat != None and geo_lng != None:
            tso.set_geocode(
                geo_lat, geo_lng, geo_rad, imperial_metric=True
            )  # must be of format: str(lat,lng,radius) + 'km'/'mi'
        if timeStart != None:
            tso.add_keyword(
                'since:' +
                str(timeStart))  # time has to be of the format: YYYY-MM-DD
        if timeStop != None:
            tso.add_keyword(
                'until:' +
                str(timeStop))  # time has to be of the format: YYYY-MM-DD
        if no_entities:
            tso.set_include_entities(False)
        if no_retweets:
            pass  #tso.set_include_rts(False) #TODO
        if no_links:
            pass  #TODO
        if no_answers:
            pass  #tso.set_exclude_replies(True) #TODO

        # Maybe use sentiment analysis? // tso.set_negative_attitude_filter()

        ts = TwitterSearch(consumer_key=consumer_key,
                           consumer_secret=consumer_secret,
                           access_token=access_token,
                           access_token_secret=access_token_secret)

        for tweet in ts.search_tweets_iterable(tso, callback=my_callback):
            #tweetsFound.append(tweet)
            tweetsCount += 1
            # write to .txt file
            with open(outputfile, 'a+') as outP:
                outP.write(str(tweet))
                outP.write('\n')
            outP.close()
            # convert and write as geoJSON:
            with open(outputgeo, 'a+') as outPgeo:
                outPgeo.write(format2geoJSON(tweet))
            outPgeo.close()
            print('@%s tweeted: %s\n' %
                  (tweet['user']['screen_name'], tweet['text']))

    except TwitterSearchException as e:
        print(e)
    except requests.exceptions.SSLError as e:
        print(e)

    return tweetsCount
Ejemplo n.º 47
0
def get_twitter_data(ticker):
    if request.method == "POST":
        ticker = request.form["ticker"]
        return redirect(url_for("get_twitter_data", ticker=ticker))

    ############################################################################
    # Getting stock data
    import requests
    r = requests.get('https://finnhub.io/api/v1/quote?symbol=' + ticker + '&token=c0fjign48v6snribcmh0')
    print(r.json())
    closing_price = r.json()['c']
    ############################################################################
    ts = TwitterSearch(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET)

    search_obj = TwitterSearchOrder()
    search_obj.set_keywords([ticker, '$' + ticker, ticker + '$'])
    search_obj.set_language('en')
    search_obj.set_include_entities(False)
    search_obj.set_since(yesterday)
    search_obj.set_until(today)

    if ticker in saved_tickers:
        print(f'checking twitter for tweets of {ticker} (saved) ...')
        count_tweets = saved_tickers[ticker][today][0]
        percentage_sentiment = saved_tickers[ticker][today][1]
        sentiment = saved_tickers[ticker][today][2]
        reddit_mentions = saved_tickers[ticker][today][3]
        upvote_ratio = saved_tickers[ticker][today][4]

    else:
        count_tweets = 0
        print('checking twitter for tweets of {} ...'.format(ticker))
        for _ in ts.search_tweets_iterable(search_obj):
            count_tweets += 1

        print('there were {} tweets of {} today'.format(count_tweets, ticker))

        search_obj.set_negative_attitude_filter()
        count_negative = 0
        for _ in ts.search_tweets_iterable(search_obj):
            count_negative += 1

        print('according to twitter, there were {} negative tweets of {} today'.format(count_negative, ticker))

        search_obj.set_positive_attitude_filter()
        count_positive = 0
        for _ in ts.search_tweets_iterable(search_obj):
            count_positive += 1

        sum_of_sentiment = count_negative + count_positive
        if sum_of_sentiment != 0:
            percentage_sentiment = count_positive / sum_of_sentiment * 100
            if percentage_sentiment > 50:
                percentage_sentiment = str(int(percentage_sentiment)) + "%"
                sentiment = "positive"
            else:
                percentage_sentiment = str(int(percentage_sentiment)) + "%"
                sentiment = "negative"
        else:
            percentage_sentiment = "0%"
            sentiment = "positive"
        ############################################################################
        reddit = praw.Reddit(client_id='fR7cRCGQceQ3DQ',
                             client_secret='NFFZcftp18b64hnADKBlMsJ3n1eFEw',
                             user_agent='Stonks',
                             username='******',
                             password='******')

        subreddits = ['stocks',
                      'wallstreetbets',
                      ]
        subreddits = [reddit.subreddit(subreddits[i]) for i in range(len(subreddits))]
        reddit_mentions = search_ticker_mentions(ticker, subreddits, limit=200)
        upvote_ratio = search_ticker_upvotes(ticker, subreddits, limit=200)

        saved_tickers[ticker] = {today: [count_tweets, percentage_sentiment, sentiment, reddit_mentions, upvote_ratio]}

        print('according to twitter, there were {} positive tweets of {} today'.format(count_positive, ticker))

    datapoints = [(today, saved_tickers[ticker][today][0], saved_tickers[ticker][today][3])]
    for i in range(1, 11):
        new_date = today - timedelta(i)
        datapoints.append((new_date, saved_tickers[ticker][new_date][0], saved_tickers[ticker][new_date][3]))

    print(datapoints)
    print("length of datapoints = " + str(len(datapoints)))
    ############################################################################
    # TODO: add upvote ratio to reddit info
    print(saved_tickers)

    return render_template("ticker.html", ticker=ticker, count_tweets=count_tweets,
                           percentage_sentiment=percentage_sentiment,
                           sentiment=sentiment,
                           reddit_mentions=reddit_mentions,
                           upvote_ratio=upvote_ratio,
                           closing_price=closing_price,
                           datapoints=datapoints)
Ejemplo n.º 48
0
def go_to_sleep(current_ts_instance):
  time.sleep(sleep_time)


for ln in lang:
    print(" working for language {0}".format(ln))
    result = []
    max_count = int(lang[ln])
    current_count = 0
    ts = TwitterSearch(consumer_key='key', consumer_secret='key', access_token='key', access_token_secret='key')
    tso = TwitterSearchOrder()
    tso.set_keywords(keywords, or_operator = True)
    tso.set_language(ln)
    tso.set_include_entities(True)
    try:
        for tweet in ts.search_tweets_iterable(tso, callback = go_to_sleep):
            if "RT @" in tweet['text'] and current_rt <= max_rt:
              current_count = current_count + 1
              current_rt = current_rt + 1
              result.append(tweet)
            elif "RT @" not in tweet['text']:
              current_count = current_count + 1
              result.append(tweet)
            if current_count >= max_count:
              break
    except TwitterSearchException as e:
        print(e)
    file = open("data/{0}_{1}.json".format(type_name, ln), "w")
    file.write(json.dumps(result))
Ejemplo n.º 49
0
def main():

    try:
        tso = TwitterSearchOrder()

        kwFile = open('keywords','r')
        keywords=kwFile.readlines()

        tso.set_keywords([s.replace('\n','') for s in keywords])

        conf = read_conf('conf')
        key = conf['key']
        secret = conf['secret']
        token = conf['token']
        token_secret = conf['token_secret']

        ts = TwitterSearch(
                consumer_key = key,
                consumer_secret = secret,
                access_token = token,
                access_token_secret = token_secret
                )

        search = ts.search_tweets_iterable(tso)

        # get previous request ids from file --DIRTY--
        if !os.path.exists('previous'): open('previous', 'a').close()
        
        previous = open('previous','r')
        prev = dict()
        for i in [s.replace('\n','') for s in previous.readlines()]:
            prev[i]=i

        #print(search.keys())
        prev_write = open('previous','w')

        mail =  conf['mail']
        pwd = conf['pwd']
        s = smtplib.SMTP(conf['smtp_host'],int(conf['smtp_port']))
        s.starttls()
        s.login(mail,pwd)

        mail_text = ''

        for tweet in ts.search_tweets_iterable(tso):
            prev_write.write(str(tweet['id'])+'\n')
            if not str(tweet['id']) in prev :
                if len(tweet['entities']['urls']) > 0 and validate_tweet(tweet['text']):
                    #print(tweet)
                    mail_text=mail_text+'\n\nTWEET AT '+tweet['created_at']
                    mail_text=mail_text+tweet['text']

        prev_write.close()

        if len(mail_text) > 0 :
            msg = MIMEText(mail_text.encode('utf-8'), 'plain', 'utf-8')
            msg['Subject'] = 'Latest #ICanHazPdf requests...'
            msg['From'] = mail
            msg['To'] = mail
            s.sendmail(mail,mail,msg.as_string())

        s.close()



    except TwitterSearchException as e:
        print(e)
Ejemplo n.º 50
0
from TwitterSearch import *

try:
    tweets = TwitterSearch(consumer_key='XXXXXXXXX',
                           consumer_secret='XXXXX',
                           access_token='XXXXXXX',
                           access_token_secret='XXXXXXX')
    pesquisa = TwitterSearchOrder()
    pesquisa.set_keywords(['chatbot'])
    pesquisa.set_language('pt')

    for tweet in tweets.search_tweets_iterable(pesquisa):
        print('@{0} tweeted: {1}'.format(tweet['user']['screen_name'],
                                         tweet['text']))

except TwitterSearchException as e:
    print(e)
Ejemplo n.º 51
0
class TweepyBot:
    #init arrays and create api
    def __init__(self):
        self.potential_influencers = []
        self.twdata = {}
        with open(data_file, 'w') as f:
            f.write(json.dumps(self.twdata))
        self.auth = tweepy.OAuthHandler(auth_data['tw_auth']['api_key'],
                                        auth_data['tw_auth']['api_secret'])
        self.auth.set_access_token(auth_data['tw_auth']['access_token'],
                                   auth_data['tw_auth']['access_secret'])
        self.api = tweepy.API(self.auth)
        self.tso = TwitterSearchOrder()
        self.tso.set_language("en")
        self.tso.set_include_entities(False)
        self.ts = TwitterSearch(
            consumer_key=auth_data['tw_auth']['api_key'],
            consumer_secret=auth_data['tw_auth']['api_secret'],
            access_token=auth_data['tw_auth']['access_token'],
            access_token_secret=auth_data['tw_auth']['access_secret'])

#updates the engagement ratio
#also dumps the text of all tweets analyzed to a json file for ML and language processing

    def set_engagement_ratio(self, user):
        tweets = get_tweets(self.api, user)
        self.twdata = json.load(open(data_file))
        count = 0
        engagement_scores = []
        for t in tweets:
            engagement_scores.append((likes_weight * t.user.favorite_count) +
                                     (retweet_weight * t.retweet_count) /
                                     t.user.followers_count)
            if not t.id in self.twdata.values():
                self.twdata[count] = {
                    'id': t.id,
                    'text': t.text,
                    'engagement_score': engagement_scores[-1]
                }
                count += 1
        return sum(engagement_scores) / len(engagement_scores)

    def get_tweets(self, user):
        statuses = self.api.user_timeline(screen_name=user, count=100)
        ret = []
        for status in statuses:
            ret.append(status._json['text'])
        return ret

    #searches tweets with a given str array of keywords
    #also dumps tweet data to mldata/twdata.json
    def search(self, keywords):
        self.tso.set_keywords(keywords)
        self.twdata = json.load(open(data_file))
        count = 0
        for tweet in self.ts.search_tweets_iterable(self.tso):
            if (tweet['user']['followers_count'] > follower_threshold):
                self.potential_influencers.append(tweet['user']['screen_name'])
                if not tweet['id'] in self.twdata.values():
                    self.twdata[count] = {
                        'id':
                        tweet['id'],
                        'text':
                        tweet['text'],
                        'engagement_score':
                        ((likes_weight * tweet['favorite_count']) +
                         (retweet_weight * tweet['retweet_count']) /
                         tweet['user']['followers_count'])
                    }
                    count += 1
        self.twdata = {
            k: v
            for k, v in sorted(self.twdata.items(), key=lambda item: item[1])
        }  #sorts by engagement
        with open(data_file, 'w') as f:
            f.write(json.dumps(self.twdata))
        self.twdata.clear()  #for ram
Ejemplo n.º 52
0
def analysis_keyword(request, keyword):
    try:
        form = KeywordForm()
        format_tweets = []
        raw_tweets = []
        time_slot = 20
        pos_timeline = [0] * time_slot
        neg_timeline = [0] * time_slot
        time_timeline = []
        tso = TwitterSearchOrder()
        tso.set_keywords([keyword])
        tso.set_language('en')
        tso.set_count(100)
        tso.set_include_entities(False)
        count = 200
        i = 0
        start_time = datetime.max
        end_time = datetime.min

        ts = TwitterSearch(
            consumer_key='aUjZ7NR0b87m7lvC7NNFxmlQi',
            consumer_secret=
            'vCNYJLewRPhMrQ6q6x1B7vJcCq1PkdOywhS7ajCY5xu9vm0u5Z',
            access_token='2940098420-wvLU4OftzQmtMjqN5NLBt4lL5kMUF5ubx6K1Oli',
            access_token_secret='4xWNY899n4JTVDKPFBEWSB2uzMI72gVF6weXqFX1xu3ID'
        )
        # fetch
        for tweet in ts.search_tweets_iterable(tso):
            text = tweet['text']
            user = tweet['user']['screen_name']
            location = tweet['user']['location']
            created_at = tweet['created_at']
            raw_tweets.append([text, user, created_at, location])
            if i >= count - 1:
                break
            else:
                i += 1

        # tagging
        for tweet in raw_tweets:
            tag, pos_value, neg_value = tagger(tweet[0])
            if tag != 0:
                stime = time.strptime(tweet[2], "%a %b %d %H:%M:%S +0000 %Y")
                dt = datetime.fromtimestamp(mktime(stime))
                format_tweets.append(
                    [tweet[0], tweet[1], dt, tag, pos_value, neg_value])

        # statistics
        negative = 0
        for tweet in format_tweets:
            if tweet[3] == -1:
                negative += 1

        # generate timeline data
        for tweet in format_tweets:
            if tweet[2] < start_time:
                start_time = tweet[2]
            if tweet[2] > end_time:
                end_time = tweet[2]
        time_intvl = (end_time - start_time) / time_slot

        for tweet in format_tweets:
            slot = get_slot(time_intvl, tweet[2], start_time) - 1
            if tweet[3] == 1:
                pos_timeline[slot] += 1
            else:
                neg_timeline[slot] += -1

        # format final timeline data
        for i in range(time_slot):
            if i % 4 == 0:
                timestr = (start_time + i * time_intvl).strftime('%H:%M:%S')
            else:
                timestr = ''
            time_timeline.append([timestr, pos_timeline[i], neg_timeline[i]])

        template = loader.get_template('classifier/alys_result.html')
        context = RequestContext(
            request, {
                'format_tweets': format_tweets,
                'len': len(format_tweets),
                'neg': negative,
                'pos': len(format_tweets) - negative,
                'keyword': keyword,
                'timeline': time_timeline,
                'form': form,
            })
        return HttpResponse(template.render(context))
    except TwitterSearchException as e:
        template = loader.get_template('classifier/error.html')
        context = RequestContext(request, {
            'e_str': str(e),
        })
        return HttpResponse(template.render(context))
Ejemplo n.º 53
0
    querystr = tso.create_search_url()

    tso2 = TwitterSearchOrder()
    tso2.set_search_url(querystr + '&exclude=retweets')

    ts = TwitterSearch(
        consumer_key='tjYkJwTKlpK40ZblE2XIqbI8a',
        consumer_secret='njaZM1pzG27wa9OXjd4DheeY3WARFNxy5UtHU1EzRdK0Tind6e',
        access_token='907999680503713792-Yz1pnFxt1hWSMzNGnjMS7vFARUvFEWg',
        access_token_secret='x2m3oZEkkR2dflLGUmXFtxGwj8r2mQvd6kBUizetmuNbI')

    def my_callback_closure(current_ts_instance):
        queries, tweets_seen = current_ts_instance.get_statistics()
        if queries > 0 and (queries % 5) == 0:
            time.sleep(60)

    for tweet in ts.search_tweets_iterable(tso2, callback=my_callback_closure):

        tagsraw = tweet.get('entities', {'hashtags': []}).get('hashtags', [])
        tags = []
        for tag in tagsraw:
            tags.append(tag.get('text'))
        append(file_path, tweet['user']['screen_name'],
               tweet['text'].replace('\n', '').replace('\r', ''),
               tweet['created_at'], tweet['retweet_count'],
               tweet['favorite_count'], tweet['user']['followers_count'], tags)

except TwitterSearchException as e:
    print(e)
Ejemplo n.º 54
0
    c1 -= 1
    return replies, since


api = tweepy.API(auth, wait_on_rate_limit=True)
print(
    "Enter the Screen Name of the political party whose tweets reply tree you want to see(eg. @BJP4India,@INCIndia,@AamAadmiParty)"
)
name = input()
#name = '@BJP4India'
reply_arr = []
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)

ts = TwitterSearch(consumer_key=CONSUMER_KEY,
                   consumer_secret=CONSUMER_SECRET,
                   access_token=ACCESS_KEY,
                   access_token_secret=ACCESS_SECRET)
tuo = TwitterUserOrder(name)  # create a TwitterUserOrder
count = 0

for full_tweets in ts.search_tweets_iterable(tuo):
    count += 1
    c1 = 0
    reply_arr, id_tweet = Reply(full_tweets, c1)
    #print(tweet.text)

    print("Tweet :", full_tweets['text'].translate(non_bmp_map))
    print("No. of replies is ..", len(reply_arr[id_tweet]))
    for elements in reply_arr[id_tweet]:
        print("Replies :", elements)
    reply_arr.clear()
Ejemplo n.º 55
0
    tso.set_keywords(
        keywords, or_operator=True
    )  # let's define all words we would like to have a look for
    #tso.set_language('de') # we want to see German tweets only
    tso.set_include_entities(
        False)  # and don't give us all those entity information

    # it's about time to create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(
        consumer_key='tJKFqoExyXrBlvXRW0Cpk9iUp',
        consumer_secret='YoEnqduMUAeRlBPK13sWgniP5eu0O8xHoXlTuw0Ht60ocpNTRB',
        access_token='341576524-jDpQev9B54G4DpEHAOfkDNSbpUzoU2wKpbaS1Tov',
        access_token_secret='QQ7glr2qYKWAd5kzMeM04rdrUFmDyagCHkaMziNH9diKW')

    # this is where the fun actually starts :)
    with open(
            '/home/jishnu/Documents/ISB/Term3/practicum/workspace/data_collection/twitter/data/tweets_{0}'
            .format(datetime.datetime.today().strftime('%Y%m%d')),
            'w') as fileout:
        #for tweet in ts.search_tweets_iterable(tso):
        #    fileout.write('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
        for idx, tweet in enumerate(ts.search_tweets_iterable(tso)):
            fileout.write(
                '###TWEET_NO:%d###!###USER:%s###!###DATE_TIME:%s###!###LOCATION:%s###!###TWEET:%s'
                % (idx, tweet['user']['screen_name'], tweet['created_at'],
                   tweet['user']['location'], tweet['text']))
            fileout.write('\n')

except TwitterSearchException as e:  # take care of all those ugly errors if there are some
    print(e)
Ejemplo n.º 56
0
def tweet_collect(hashstring,in_country,disaster):
    tweets_list=[]
    #time.sleep(2)
    try:
        search_string="#"+hashstring
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_language('en')
        tso.add_keyword(search_string)
        ts = TwitterSearch(
            consumer_key='LheTHAR7DSfhkIqYiBdlA',
            consumer_secret='F8uj4jsQv7THfjs1fUf7iLDTlgQUcneJIEhEXgO6A',
            access_token='1282607706-yAOJ6ZQ8zLJrTPK1LxzEJ4yfgU24EwHDK64LFAu',
            access_token_secret='UfpSAvmUyio0ydV2mn5kBz3fP7A6c5JHmlNyGizFVvYtG',
            verify=False
        )
        #print("Current rate-limiting status: %s" % ts.get_metadata()['x-rate-limit-remaining'])
        for tweet in ts.search_tweets_iterable(tso):
            #print(tweet)
            with open(twitter_raw_filename, 'a', encoding='utf-8') as f:
                json.dump(tweet, f, ensure_ascii=False)
            f.close()
            tweet_dict={}
            tweet_dict["tweet_user_name"] = tweet['user']['name']
            tweet_dict["tweet_user_screen"] = tweet['user']['screen_name']
            tweet_dict["follower_count"] = tweet['user']['followers_count']
            tweet_dict["user_created"] = tweet['user']['created_at']
            tweet_dict["tweet_count"] = tweet['user']['statuses_count']
            tweet_dict["following_status"] = tweet['user']['following']
            tweet_dict["friends_count"] = tweet['user']['friends_count']
            tweet_dict["favourites_count"] = tweet['user']['favourites_count']
            tweet_dict["description"] = tweet['user']['description']
            tweet_dict["user_verified"] = tweet['user']['verified']
            tweet_dict["user_time_zone"] = tweet['user']['time_zone']
            tweet_dict["user_location"] = tweet['user']['location']
            tweet_dict["user_tweet"]=tweet['text']
            hash_tag_list = re.findall(r"#(\w+)", tweet['text'])
            hash_tag = ','.join(hash_tag_list)
            tweet_dict["user_created"]=tweet['created_at']
            tweet_dict["hash_tag"]=hash_tag
            clean_the_tag = re.compile('<.*?>')
            cleantext = re.sub(clean_the_tag, '', tweet['source'])
            tweet_dict["tweet_source"] = cleantext
            doc = nlp(tweet['text'])
            location_from_tweets_list=[]
            for token in doc.ents:
                if token.label_ == "GPE" or token.label_ == "LOC" and token.label_ != "":
                    print (token.text,token.label_)
                    location_from_tweets_list.append(token.text)
            location_from_tweets=",".join(location_from_tweets_list)
            tweet_dict["location_from_tweets"] = location_from_tweets
            tweet_sentiment=TextBlob(tweet['text'])
            tweet_dict["tweet_sentiment_polarity"]=tweet_sentiment.polarity
            tweet_dict["tweet_sentiment_subjectivity"]=tweet_sentiment.subjectivity
            (tweet_dict["match_ratio"],tweet_dict["country_partial_match_ratio"],tweet_dict["token_sort_match_ratio"],tweet_dict["token_set_match_ratio"],)=match_score(tweet['text'],in_country+" "+disaster)
            (tweet_dict["location_from_tweets_match_ratio"],tweet_dict["location_from_tweets_country_partial_match_ratio"],tweet_dict["location_from_tweets_token_sort_match_ratio"],tweet_dict["location_from_tweets_token_set_match_ratio"],)=match_score(location_from_tweets,in_country)
            tweet_dict["input_disaster_location"]=in_country
            tweet_dict["input_disaster"]=disaster
            print(tweet_dict)
            tweets_list.append(tweet_dict)
        return tweets_list
    except TwitterSearchException as e:
        print(e)
Ejemplo n.º 57
0
def _do_lookup(potential_user, body):
    config = get_config()
    limit_of_tweets = int(args.limit)

    # Just in case the person using the program puts in ' or " in the config.
    consumer_key = config.get(
        'twitter',
        'consumer_key').replace(
        '\'', '').replace(
        '\"', '')
    consumer_secret = config.get(
        'twitter',
        'consumer_secret').replace(
        '\'', '').replace(
        '\"', '')
    access_token = config.get(
        'twitter',
        'access_token').replace(
        '\'', '').replace(
        '\"', '')
    access_token_secret = config.get(
        'twitter',
        'access_token_secret').replace(
        '\'', '').replace(
        '\"', '')

    if potential_user is None:
        print(colored("[*] It looks like OCR failed. Please make sure you " +
                      "crop the image as in sample and is readable.", 'red'))
        exit(1)

    try:
        tuo = TwitterUserOrder(potential_user[1:])
        ts = TwitterSearch(
            consumer_key=consumer_key,
            consumer_secret=consumer_secret,
            access_token=access_token,
            access_token_secret=access_token_secret
        )
        tweets = []
        for tweet in ts.search_tweets_iterable(tuo):
            # Nobody cares about re-tweets
            if 'RT ' not in tweet['text']:
                if tweet not in tweets:
                    tweets.append((tweet['text'], tweet['id']))
                if not limit_of_tweets:
                    break
                else:
                    limit_of_tweets -= 1

        # If none of that was found, let's report an OCR error
        if body is None:
            print(colored("[*] It looks like OCR failed.Please make sure you " +
                          "crop image as in sample and is readable.", 'red'))

        found_tweet = False
        # Check against every tweet pulled
        for tweet in tweets:
            removed_elements = 0
            ltweet, orig_len = tweet[0].split(' '), len(tweet[0].split(' '))
            # Compare each element of body to element in body. TODO: Optimize
            for ele in body:
                if ele in ltweet:
                    removed_elements += 1
                    ltweet.remove(ele)
            removal_rate = (removed_elements / float(orig_len)) * 100

            if int(removal_rate) > 75:
                found_tweet = True
                print(colored("[*] It looks like this is a valid tweet",
                              'green'))
                print(colored("-> Confidence : " + "%.2f" % removal_rate + "%",
                              'green'))
                print(colored("-> Potential URL : https://twitter.com/" +
                              potential_user[1:] +
                              "/status/" + str(tweet[1]), 'green'))

            elif int(removal_rate) in (55, 75):
                found_tweet = True
                print(colored("[*] This might be a valid tweet", 'yellow'))
                print(colored("-> Confidence : " + "%.2f" % removal_rate + "%",
                              'yellow'))
                print(colored("-> Potential URL : https://twitter.com/" +
                              potential_user[1:] +
                              "/status/" + str(tweet[1]), 'yellow'))

        if not found_tweet:
            print(colored("[*] I couldn't find a tweet like that. " +
                          "Try increasing the limit to pull more tweets",
                          'yellow'))

    except TwitterSearchException as e:  # catch all those ugly errors
        print(e)
Ejemplo n.º 58
0
import numpy as np
import string
from TwitterSearch import *  #bliblioteca usada
try:
    tuo = TwitterUserOrder(
        'dunfrey')  #conta que deseja verificar as informações
    ts = TwitterSearch(
        #informaçoes da api do twitter
        consumer_key='IMJh4kjQLGDzUaT9t1v0RXm5Y',
        consumer_secret='cjt9d684CpvElXof1BxUMgSakNnFBVLDweQTSpGZolzzrnU8JE',
        access_token='968521944944529408-oI5NcJVaZellwrsPjhsQkQPDeAZJzKf',
        access_token_secret='hc7bTI65fG97smD3ZEB6iCjLrBzHBxn2Sp6TIaX8fZSJZ')

    s = ""  # variavel criada para armazenar a informaçao do twitter

    for tweet in ts.search_tweets_iterable(
            tuo):  #laço que pega um twitter por vez
        texto = ('@%s tweeted: %s' %
                 (tweet['user']['screen_name'], tweet['text']))
        print(texto)  #printa o twitter

        s += str(texto)  #joga toda informação coletada em uma variavel

except TwitterSearchException as e:
    print(e)  #tratamento de exceção

text = s.lower(
)  #informaçao coletada é colocada em letra minuscula para que ela possa ser tratada logo abaixo

text = text = text.translate({ord(k): None for k in string.punctuation})
words = text.split()
wordset = set(words)
Ejemplo n.º 59
0
'''complexity:
functions to analyse data
see if anyone tweets back'''




from TwitterSearch import *
try:                                                    
    TSearch = TwitterSearchOrder()                      #creating an object of the attributes of the searching guidelines 
    TSearch.set_keywords()                              #==== need the list of keywords (attribute of an object)
    TSearch.set_language('en')                          #set language of tweets to english  
    TSearch.set_include_entities(True)                  #this is the info from the tweets ==== look up what info is involved 

    #token used to connect to my twitter account
    T = TwitterSearch(
        consumer_key = '',
        consumer_secret = '',
        access_token = '',
        access_token_secret = '')

    for tweet in T.search_tweets_iterable(TSearch):
        print(tweet['user']['screen_name'] + tweet['text']) #this is where the functions will be called to scrape data, maybe to tweet back ? (another api?) ==== research necessary APIs GitHub
Ejemplo n.º 60
0
#     "to": "uri:twitter/user/{{ in_reply_to_user_id_str }}",
#     "author": "uri:twitter/user/{{ user['id_str'] }}",
#     "text": "{{ text }}",
#     "parent": "uri:twitter/tweet/{{ in_reply_to_user_id_str }}",
#     ## "mentions" is an array of the caliper IDs from the user_mentions objects array
#     "mentions": ["uri:twitter/user/{{ entities[user_mentions]['id_str'] }}", "..." ],
#     ## "hashtags" is an array of the hashtag texts included in the tweet entities
#     "hashtags": ["{{ entities[hashtags][text] }}", " "]
#   }
# }

# Open the variables
db_inserts = 0
caliper_tweet_object = {}
twitter_id_list = []
twitter_search = ts.search_tweets_iterable(tso)
tweet_sentiment = ''

# this is where the fun actually starts :)
for tweet in twitter_search:
    mentions_list = []
    hashtags_list = []
    tweet_id = ""
    # Create the caliper_tweet object
    caliper_tweet = {
        "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent",
        "type": "MessagingEvent",
        "startedAtTime": "",
        ## Can be used to query Twitter API for user information
        "actor": "",
        "verb": "tweetSent",