Python TwitterSearch.search_tweets_iterable 예제들, TwitterSearch.TwitterSearch.search_tweets_iterable Python 예제들

예제 #1

0

파일 보기

파일: SearchTweets.py 프로젝트: RCoralie/SentimentAnalysis

def SearchOnTwitter(keywords, language):
    """
    Allows to test twitter search library -> Print tweets of interest.
        Parameters:
            - keywords : string array that tweets must contain
            - language : string indicating the language of the interest tweets
        Return :
            - array of tweets
    """
    tweets = []
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords(
            keywords
        )  # let's define all words we would like to have a look for
        tso.set_language(language)  # we want to see German tweets only
        tso.set_include_entities(
            False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(consumer_key=consumer_key,
                           consumer_secret=consumer_secret,
                           access_token=access_token,
                           access_token_secret=access_token_secret)

        # this is where the fun actually starts :)
        for tweet in ts.search_tweets_iterable(tso):
            tweets.append(tweet['text'])

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)

    return tweets

예제 #2

0

파일 보기

def coleta_tweets():

    try:

        ts = TwitterSearch(consumer_key='',
                           consumer_secret='',
                           access_token='',
                           access_token_secret='')

        tso = TwitterSearchOrder()
        tso.set_keywords(['Harry potter'])
        tso.set_language('pt')
        df = []
        for tweet in ts.search_tweets_iterable(tso):
            df.append('@%s tweeted: %s' %
                      (tweet['user']['screen_name'], tweet['text']) + ',')
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) )
        print('Coleta finalizada!')

        df = pd.DataFrame(df)
        #df.to_csv('tweets.txt')
        #print('Arquivo salvo.')
        return df
    except TwitterSearchException as e:
        print(e)

예제 #3

0

파일 보기

파일: get_tweets.py 프로젝트: LeandroRomualdo/NLP

def coleta_tweets():

    try:
    
        ts = TwitterSearch(
            consumer_key = '',
            consumer_secret = '',
            access_token = '',
            access_token_secret = ''
        )
    
        tso = TwitterSearchOrder()
        tso.set_keywords(['Harry potter'])
        tso.set_language('pt')
        df = []
        for tweet in ts.search_tweets_iterable(tso):
            df.append('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'])+',')
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) )
        print('Coleta finalizada!')
        
        df = pd.DataFrame(df)
        #df.to_csv('tweets.txt')
        #print('Arquivo salvo.')
        return df
    except TwitterSearchException as e:
        print(e)

예제 #4

0

파일 보기

파일: loops.py 프로젝트: tinanimo/L8pR

 def get_tweets(query):
     from TwitterSearch import TwitterSearch, TwitterSearchOrder
     import itertools
     tso = TwitterSearchOrder()
     tso.set_keywords(query.get('query', '').split(' '))
     # tso.set_language('en')
     tso.set_include_entities(False)
     ts = TwitterSearch(
         consumer_key=app.config.get('TWITTER_CONSUMER_KEY'),
         consumer_secret=app.config.get('TWITTER_CONSUMER_SECRET'),
         access_token=app.config.get('TWITTER_ACCESS_TOKEN'),
         access_token_secret=app.config.get('TWITTER_ACCESS_TOKEN_SECRET')
     )
     return list(itertools.islice(ts.search_tweets_iterable(tso), 0, int(query.get('count', 5))))

예제 #5

0

파일 보기

파일: tasks.py 프로젝트: saromanov/twitter-flask-app

def getTweetsByWords(authdata, word,limit=100):
    tso = TwitterSearchOrder()
    tso.set_keywords([word])
    tso.set_include_entities(False)
    ts = TwitterSearch(consumer_key=authdata['consumer_key'], consumer_secret=authdata['consumer_secret'], access_token=authdata['access_token'], access_token_secret=authdata['access_token_secret'])
    result = []
    c = 0
    for tweet in ts.search_tweets_iterable(tso):
        if c == limit:
            break
        result.append(tweet['text'])
        print(c)
        c+=1
    return {'status': 'Task Completed', 'result': result}

예제 #6

0

파일 보기

def count_tweets_of_app(app_name):
    """
		Counts how many tweets are with the hashtag app_name and COMPETITION_NAME from diferent users

		Args:
			app_name:	name of the app of whose tweets are to be counted

		Returns:
			num of votes (tweets)
	"""

    from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords([
            check_hashtag(app_name), COMPETITION_NAME
        ])  # let's define all words we would like to have a look for

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(consumer_key=TWITTER_API_KEY,
                           consumer_secret=TWITTER_API_KEY_SECRET,
                           access_token=TWITTER_ACCESS_TOKEN,
                           access_token_secret=TWITTER_ACCESS_TOKEN_SECRET)

        # this is where the fun actually starts :)
        users = []
        #count = 0

        for tweet in ts.search_tweets_iterable(tso):

            user = tweet['user']['id']

            #Check if tweet if from the same user
            if user not in users:
                #more info https://dev.twitter.com/overview/api/tweets
                time_tweet = datetime.datetime.strptime(
                    tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y')

                if (COMPETITION_START_DATE <
                        time_tweet) & (time_tweet < COMPETITION_END_DATE):
                    users.append(user)
                    #count += 1 + tweet["retweet_count"]
                    #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

        return len(users)

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
        return -1

예제 #7

0

파일 보기

class TwitterScrape:
    """Methods to gather data from twitter searches"""
    def __init__(self):
        # Login to twitter handle using oauth
        self.twitter = TwitterSearch(
            consumer_key='PYX15cyo7pBYyrny2kXomGf4N',
            consumer_secret=
            'mCMtxofBFLtJv1GVRXeB9w0pw64ObRDPGmIZEGRo3uyl1oPVci',
            access_token='3369817647-TTV9HTaWAIbvrbpJwgXkVQtm0akEMSihl43No3P',
            access_token_secret='WjxjNW8YWmRSL65eIYYhQd4DsBSECO7wKpZpKsfLcR99q'
        )

    def search(self, query, lang='en', n=10**5):
        """
        Search twitter for specified query.
        Function returns n tweets or as many as can be found for that query.

        Parameters:
        query -- Search query (String)
        lang -- Specify language of tweets, optional, default: 'en' (String)
        n -- Number of tweets to return, optional, default: 10**3 (Int)

        Returns: 
        tweets_out -- Pandas series of tweets of length n
        """
        # Initialise container
        tweets_out = []
        # Setup twitter search
        tso = TwitterSearchOrder()
        tso.set_keywords([query])
        tso.set_language(lang)
        tso.set_include_entities(False)

        # Begin search
        sys.stdout.write("Tweet number out of {0}: ".format(n))
        for i, tweet in enumerate(self.twitter.search_tweets_iterable(tso)):
            # Break from loop when n tweets are reached
            if i == n:
                break
            # Output progress
            if i % 100 == 0:
                sys.stdout.write('{0} '.format(i))
                sys.stdout.flush()
            # Add the next tweet to the container
            tweets_out.append('%s' % (tweet['text']))
        print
        # Return as pandas series as it's easier to work with
        return pd.Series(tweets_out)

예제 #8

0

파일 보기

파일: manage.py 프로젝트: naraquev/spanish-toxic-comments-detection

def hello_world(keywords):
    response = make_response()
    response.headers.add("Access-Control-Allow-Origin", "*")
    response.headers.add("Access-Control-Allow-Headers", "*")
    response.headers.add("Access-Control-Allow-Methods", "*")
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords([keywords])
        ts = TwitterSearch(keys)
        tweets = []
        for tweet in ts.search_tweets_iterable(tso):
            tweets.append(tweet['text'])
    except TwitterSearchException as e:
        print(e)
    response = jsonify({'status': 200, 'results': tweets})
    return response

예제 #9

0

파일 보기

파일: TwitterSearchLastWeekIterator.py 프로젝트: Wojtos/TwitterAnalisys

class TwitterSearchLastWeekIterator(TwitterSearchIterator):
    def __init__(self, search_query):
        self.search_query = search_query
        self.library = TwitterSearch(
            consumer_key=os.getenv("SEARCHTWEETS_CONSUMER_KEY"),
            consumer_secret=os.getenv("SEARCHTWEETS_CONSUMER_SECRET"),
            access_token=os.getenv("SEARCHTWEETS_ACCESS_TOKEN"),
            access_token_secret=os.getenv("SEARCHTWEETS_ACCESS_TOKEN_SECRET")
        )
        twitter_search_order = self.search_query.create_twitter_search_order()
        self.iter = iter(self.library.search_tweets_iterable(twitter_search_order))

    def __iter__(self):
        return self

    def __next__(self):
        return next(self.iter)

예제 #10

0

파일 보기

파일: routes.py 프로젝트: ThunderClouds320/MutualClouds

    def get(self, user_handle=None):
        from TwitterSearch import TwitterSearch, TwitterUserOrder, TwitterSearchException

        if user_handle is None:
            return jsonify({
                'response': [],
                'status': 400,
                'message': 'No handle provided'
            })

        try:
            import itertools

            user_profile = TwitterUserOrder(user_handle)

            # Hardcode our API keys for optimal security
            consumer = 'CedAugFXME85jW5MRraKTJFgO'
            consumer_secret = 'RjLOp02iZqQnGM5cOt4bBeFjFHtFyVW09NSH14rVEyPouFvWLs'
            access = '378294925-zdTFn1Gf8rcBzv6gshfjfONZG9ZSc8QFUlZd1YO8'
            access_secret = '0MV9lR9kFdoUkLnKoWgdZCl74vunMAoCR7INC7pQYrSfW'

            ts = TwitterSearch(consumer_key=consumer,
                               consumer_secret=consumer_secret,
                               access_token=access,
                               access_token_secret=access_secret)

            # Fetch a list of tweets from the user with the provided handle
            tweet_iterator = ts.search_tweets_iterable(user_profile)

            # By default, we fetch only 20 tweets unless a query parameter is specified
            num_tweets = int(request.args.get('numTweets', 20))
            resolved_tweets = list(itertools.islice(tweet_iterator,
                                                    num_tweets))

            return jsonify({'response': resolved_tweets, 'status': 200})

        except TwitterSearchException as e:
            return jsonify({
                'response': [],
                'status':
                404,
                'message':
                'There was a problem fetching the data for {}: {}'.format(
                    user_handle, e)
            })

예제 #11

0

파일 보기

def getTweets(politician_id, searchOnlySexistWords):
    try:

        politician = Politician.objects.get(id=politician_id)
        politician_names = [
            politician.first_name + " " + politician.last_name,
            politician.username
        ]

        tso = TwitterSearchOrder()
        searchTerms = []

        if searchOnlySexistWords:
            sexistWords = CONFIG['SEXISTWORDS']
            for word in sexistWords:
                for politician_name in politician_names:
                    searchTerms.append(word + ' ' + politician_name)
        elif searchOnlySexistWords is False:
            searchTerms = politician_names

        tso.set_keywords(searchTerms, or_operator=True)
        tso.set_language("en")
        tso.set_include_entities(False)
        querystr = tso.create_search_url()
        tso.set_search_url(querystr + "&tweet_mode=extended")
        ts = TwitterSearch(consumer_key=CONFIG['CONSUMER_KEY'],
                           consumer_secret=CONFIG['CONSUMER_SECRET'],
                           access_token=CONFIG['ACCESS_TOKEN'],
                           access_token_secret=CONFIG['ACCESS_TOKEN_SECRET'])

        print("**Processing tweets for " +
              str(politician.first_name + " " + politician.last_name) + "**")
        if searchOnlySexistWords:
            tweets = ts.search_tweets_iterable(tso)
            return tweets
        else:
            # will limit to 100 if not only searching sexist words
            tweets = ts.search_tweets(tso)
            return tweets['content']['statuses']

    except TwitterSearchException as e:
        logging.exception("Unable to get new tweets because of" + str(e))

예제 #12

0

파일 보기

파일: socialmedia_stats.py 프로젝트: uzipaz/SocialMediaStatCollector

    def getStats(self, url, proxy, headers, timeout):
        """returns (retweet + favorite count) count from twitter API , url is url that could be in a tweet, proxy is 'ip:port' in string, headers should contain user-agent in as an item in dictionary, timeout is maximum time while waiting for response and is an int"""
        count = 0

        tso = TwitterSearchOrder()
        tso.set_search_url('q=' + url)
        tso.set_result_type(result_type='mixed')
        tso.set_include_entities(False)
        tso.set_count(100)

        ts = TwitterSearch(consumer_key=self.ConsumerKey,
                           consumer_secret=self.ConsumerSecret,
                           access_token=self.AccessTokenKey,
                           access_token_secret=self.AccessTokenSecret,
                           proxy=proxy)

        for tweet in ts.search_tweets_iterable(tso):
            count += tweet['retweet_count'] + tweet['favorite_count']

        return count

예제 #13

0

파일 보기

파일: TweetFetcher.py 프로젝트: roytouw/iscp_server

    def search(self):
        try:
            tso = TwitterSearchOrder()
            tso.set_keywords(*self.search_terms)
            tso.set_include_entities(False)
            tso.set_count(100)

            ts = TwitterSearch(
                consumer_key='aOUVcCWLIYEbUvHW5dLjVc7Gf',
                consumer_secret='8qb3LTAHbj43J40Rxm0RMLAOaP4QoEHfFVGTeJ3S6iUmSBq6JJ',
                access_token='4251433696-ulZx8dJ3QZE95ds0PhXNldeKFhjhBUoGSuGycSE',
                access_token_secret='wx65NQaBHHgwC4xLOgRxFSs4kWWzkg09KkgNkAKHZryks'
            )

            for tweet in ts.search_tweets_iterable(tso):
                self.data.append(tweet['text'])
                self.save_line(tweet['text'])

            # self.save_data(self.data)
        except TwitterSearchException as exception:
            print(exception)

예제 #14

0

파일 보기

파일: twitterSearch.py 프로젝트: ndoppstadt/pst

def getTweets(politician_id):
	try:

		politician = Politician.objects.get(id=politician_id)

		politician_names = [politician.first_name + " " + politician.last_name, politician.last_name, politician.username]
		print("Getting Tweets for " + str(politician.first_name + " " + politician.last_name))
		tso = TwitterSearchOrder()			
		sexistWords = ['bitch', 'skank', 'rape']
		searchTerms = []

		for word in sexistWords:
			for politician in politician_names:
				searchTerms.append(word + ' ' + politician)
		
		tso.set_keywords(searchTerms, or_operator=True)
		print(searchTerms)
		tso.set_language("en")
		tso.set_include_entities(False)
		querystr = tso.create_search_url()
		tso.set_search_url(querystr + "&tweet_mode=extended")

		ts = TwitterSearch(
            consumer_key = os.environ.get('CONSUMER_KEY', CONFIG['CONSUMER_KEY']),
            consumer_secret = os.environ.get('CONSUMER_SECRET', CONFIG['CONSUMER_SECRET']),
            access_token = os.environ.get('ACCESS_TOKEN', CONFIG['ACCESS_TOKEN']),
            access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET', CONFIG['ACCESS_TOKEN_SECRET'])
        )
		
		return ts.search_tweets_iterable(tso)

	except TwitterSearchException as e:
		logging.exception("Unable to get new tweets because of"  + str(e))

# if __name__ == "__main__":
#     getTweets()

예제 #15

0

파일 보기

파일: tweet_collector.py 프로젝트: samueldowd/Twitter-sentimentAnalysis

    tso.set_since_id(latest_id_int)
    print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.'
else:
	print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.'

    
# Create a TwitterSearch object with our secret tokens
ts = TwitterSearch(
    consumer_key = config.get('Twitter', 'consumer_key'),
    consumer_secret = config.get('Twitter', 'consumer_secret'),
    access_token = config.get('Twitter', 'access_token'),
    access_token_secret = config.get('Twitter', 'access_token_secret')
 )
 
# Perform the search
twitter_search = ts.search_tweets_iterable(tso)

# Start the insert count variable
db_inserts = 0

# this is where the fun actually starts :)
try:
    for tweet in twitter_search:
        if db_inserts < count:
            mentions_list = []
            hashtags_list = []
            # Create the caliper_tweet object
            caliper_tweet = {
          "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent",
          "type": "MessagingEvent",
          "startedAtTime": "",

예제 #16

0

파일 보기

def Tweets():

    try:

        max_feeds = 10
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_language('en')
        tso.set_include_entities(
            False)  # and don't give us all those entity information
        tso.set_until(new_date)
        tso.arguments.update({'tweet_mode': 'extended'})
        tso.arguments.update({'truncated': 'False'})

        ts = TwitterSearch(consumer_key='',
                           consumer_secret='',
                           access_token='',
                           access_token_secret='',
                           proxy='http://proxy_address')

        for c in range(len(MainDF)):
            count = 0

            #kw=[MainDF['twitter'][c]]
            #for h in MainDF['hashtag'][c]:
            #    kw.append(h)

            tso.set_keywords(MainDF['hashtag'][c])
            tweets_list = []

            tuo = TwitterUserOrder(MainDF['twitter'][c])
            #            tuo.set_language('en')
            tuo.set_include_entities(
                False)  # and don't give us all those entity information
            #            tuo.set_until(days_ago)
            #            tuo.set_count(15)
            tuo.arguments.update({'tweet_mode': 'extended'})
            tuo.arguments.update({'truncated': 'False'})

            #for tweet in ts.search_tweets_iterable(tso):
            #    print(tweet)
            #    tweets_list.append([tweet['user']['screen_name'],tweet['full_text']])

            for tweet in ts.search_tweets_iterable(tso):
                if 'retweeted_status' in tweet:
                    None
                    #tweets_list.append([tweet['user']['screen_name'],tweet['retweeted_status']['full_text'],'Retweet of ' + tweet['retweeted_status']['user']['screen_name']])
                else:
                    links = Find(tweet['full_text'])
                    links = ', '.join(link for link in links)
                    #print(tweet)
                    tweets_list.append([
                        MainDF['company'][c], tweet['user']['screen_name'],
                        tweet['full_text'], tweet['created_at'], links
                    ])

            for tweet in ts.search_tweets_iterable(tuo):
                if tweet['lang'] != 'en':
                    #print(tweet)
                    None
                else:

                    # print(tweet)
                    links = Find(tweet['full_text'])
                    links = ', '.join(link for link in links)

                    tweets_list.append([
                        MainDF['company'][c], tweet['user']['screen_name'],
                        tweet['full_text'], tweet['created_at'], links
                    ])
                    count = count + 1

                    if count == max_feeds:
                        break

            if tweets_list != []:
                tweets_datasets[MainDF['company'][c]] = pd.DataFrame(
                    tweets_list)
                tweets_datasets[MainDF['company'][c]].columns = [
                    'Company', 'Source/User', 'Title/Tweet', 'Date', 'Link'
                ]
                tweets_datasets[MainDF['company'][c]].insert(
                    0, 'Category', 'Twitter')

                for i in range(
                        len(tweets_datasets[MainDF['company'][c]]['Date'])):

                    tweets_datasets[MainDF['company'][c]]['Date'][i] = parse(
                        tweets_datasets[MainDF['company'][c]]['Date'][i])
                    tweets_datasets[
                        MainDF['company'][c]]['Date'][i] = tweets_datasets[
                            MainDF['company'][c]]['Date'][i].date()

                    #print(datasets[companies_names[count]])

                tw_current_companies.append(MainDF['company'][c])

            else:
                None

            #tweets_list.append()
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)

예제 #17

0

파일 보기

    document_ids = tuple(filter(lambda id_: not id_.startswith('_'), database))
    if len(document_ids) > 0:
        # If we already have imported tweets, we should continue with the oldest
        # tweet we know and work our way to older tweets from there.
        # We do that by setting the max_id query parameter to the oldest tweet
        # we know.
        oldest_id = min(document_ids)
        twitter_query.set_max_id(int(oldest_id))
        print('Continuing initial import from tweet {}'.format(oldest_id))
    else:
        print('Starting initial import on fresh database.')

    try:
        # Start making requests to the twitter API by searching tweets with our
        # twitter query.
        twitter_result_stream = twitter_connection.search_tweets_iterable(
            twitter_query)
    except TwitterSearchException as exc:
        if exc.code == 429:
            # Twitter has responded with a "429 Too Many Requests" error.
            # That means we made more requests than twitter allows us to do.
            # See: https://developer.twitter.com/en/docs/basics/rate-limiting
            # We now wait for 100 seconds and then try again until we can make
            # requests again.
            # We use tqdm for displaying the sleep progress.
            for second in tqdm(range(100), 'Sleep because of rate limit'):
                sleep(1)  # sleep for 1 second
            continue
        else:
            # If it is another exception, re-raise the exception so that it is
            # displayed and aborts the import.
            raise

예제 #18

0

파일 보기

파일: TweetSensor.py 프로젝트: samueldowd/Twitter-sentimentAnalysis

def collect_tweets(keyword, count, force=False):    
    from TwitterSearch import TwitterSearch
    from TwitterSearch import TwitterSearchOrder
    import pymongo
    from dateutil.parser import parse
    from alchemyapi import AlchemyAPI
    import ConfigParser
    
    # try:
    #     keyword = sys.argv[1]
    #     count = int(sys.argv[2])
    # except IndexError:
    # 	e_too_few_args = "You did not enter enough arguments. Two are required: keyword, and count"
    # 	raise Exception(e_too_few_args)
    # try:
    #     if sys.argv[3] == '-f':
    #         force = True
    #     else:
    #         e_invalid_argument = "The only option available is -f. It is used to force the script to continue when the Alchemy API limit is exceeded."
    #         raise Exception(e_invalid_argument)    
    # except IndexError:
    #     force = False
    
    # Read the config file for config variables
    config = ConfigParser.RawConfigParser()
    config.read('config.cfg')
    mongo_url = config.get('Mongo', 'db_url')
    
    # Connect to the Mongo database using MongoClient
    
    client = pymongo.MongoClient(mongo_url)
    db = client.get_default_database()
    # Access/create the collection based on the command line argument
    tweets = db[keyword]
    
    #Generate the alchemyapi variable
    alchemyapi = AlchemyAPI()
    
    # To accommodate for hashtags the user can substitute a . for the # in the command line. Lines 30 & 31 return it to a hashtag for the search.
    if keyword[0] is ".":
        keyword = keyword.replace('.', '#')
    
    # Lines 33-42 ensure that the query is not doing duplicate work.
    # First, it counts to see how many documents exist in the collection
    db_count = tweets.count()
    
    # If there are documents in the collection, the collection is queried, tweet objects are sorted by date, and the tweet_id of the most recent tweet is retrieved and later set as the "since_id"
    if db_count is not 0:
        latest_id = tweets.find( {}, { 'object.tweet_id':1 } ).sort("startedAtTime").limit(1)
        latest_id_str = latest_id[db_count-1]['object']['tweet_id']
        latest_id_int = int(latest_id_str)
        print 'Count of documents in the ' + keyword + ' collection is not 0. It is ' + str(db_count) + '. Mongo is now identifying the latest tweet ID to append as a parameter to the API call.'
    # If ther are no documents in the collection, no queries are done, and the since_id is left out of the API call.    
    else:
        print 'The Mongo collection ' + keyword + ' is empty. The script will now collect all tweets.'
        
    # create a TwitterSearchOrder object
    tso = TwitterSearchOrder() 
    
    # let's define all words we would like to have a look for
    tso.set_keywords([keyword])
    
    # Select language
    tso.set_language('en') 
    
    # Include Entity information
    tso.set_include_entities(True)
    
    if db_count is not 0:
        tso.set_since_id(latest_id_int)
        print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.'
    else:
    	print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.'
    
        
    # Create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(
        consumer_key = config.get('Twitter', 'consumer_key'),
        consumer_secret = config.get('Twitter', 'consumer_secret'),
        access_token = config.get('Twitter', 'access_token'),
        access_token_secret = config.get('Twitter', 'access_token_secret')
     )
     
    # Perform the search
    twitter_search = ts.search_tweets_iterable(tso)

    # Start the insert count variable
    db_inserts = 0
    
    # this is where the fun actually starts :)
    try:
        for tweet in twitter_search:
            if db_inserts < count:
                mentions_list = []
                hashtags_list = []
                # Create the caliper_tweet object
                caliper_tweet = {
              "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent",
              "type": "MessagingEvent",
              "startedAtTime": "",
              ## Can be used to query Twitter API for user information
              "actor": "",
              "verb": "tweetSent",
              "object": {
                "type": "MessagingEvent",
                "tweet_id": "",
                "tweet_uri": "",
                "subtype": "tweet",
                ## "to" should be calculated by checking in_reply_to_user_id_str is null. If it's not null, then it should be concatenated to "uri:twitter/user/" and stored in "object"['to']
                "to": "",
                "author": {
                    "author_uri": "",
                    "author_alias": "",
                    "author_name": "",
                    },
                "text": "",
                "sentiment": {
                    "type": "",
                    "score": "",
                    "color": ""
                },
                "parent": "",
                ## "mentions" is an array of the caliper IDs from the user_mentions objects array
                "user_mentions": [],
                ## "hashtags" is an array of the hashtag texts included in the tweet entities
                "hashtags": []
              }
            }
                
                 # Set the re-usable variables
                tweet_text = tweet['text']
                
                ## AlchemyAPI Sentiment Analysis
                tweet_sentiment = ''
                response = alchemyapi.sentiment('text', tweet_text)
                if 'docSentiment' in response.keys():
                    if 'score' in response['docSentiment']:
                        tweet_sentiment_score = response['docSentiment']['score']
                        tweet_sentiment_score = float(tweet_sentiment_score)
                        tweet_sentiment_score = round(tweet_sentiment_score, 2)
                    else:
                        tweet_sentiment_score = 0
                    tweet_sentiment_type = response['docSentiment']['type']
                    tweet_sentiment_score_a = abs(tweet_sentiment_score)
                    if (tweet_sentiment_score) > 0:
                        tweet_sentiment_color = "rgba(0,255,0," + str(tweet_sentiment_score_a) + ")"
                    else: 
                        tweet_sentiment_color = "rgba(255,0,0," + str(tweet_sentiment_score_a) + ")"
                elif force == True:
                    print 'Force option set to true. The tweet_sentiment object will be set with API Limit Exceeded values.'
                    tweet_sentiment_type = 'API Limit Exceeded'
                    tweet_sentiment_score = 0
                    tweet_sentiment_color = 'rgba(0,0,0,0)'
                else:
                    e_alchemy_api_limit = 'Alchemy API daily limit exceeded. Retry search with force=True to continue'
                    raise Exception(e_alchemy_api_limit)
                    
            
                ds = tweet['created_at']
                tweet_date = parse(ds)
                caliper_tweet['startedAtTime'] = tweet_date
                caliper_tweet['actor'] = 'student:' + tweet['user']['screen_name']
                caliper_tweet['object']['tweet_uri'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['id_str']
                caliper_tweet['object']['tweet_id'] = tweet['id_str']
                if tweet['in_reply_to_user_id_str'] is None:
                    caliper_tweet['object']['to'] = 'NoReply'
                    caliper_tweet['object']['parent'] = 'NoReply'
                else:
                    caliper_tweet['object']['to'] = 'https://twitter.com/intent/user?user_id=' + tweet['in_reply_to_user_id_str']
                    if tweet['in_reply_to_status_id_str'] is None:
                        caliper_tweet['object']['parent'] = 'None'
                    else:    
                        caliper_tweet['object']['parent'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['in_reply_to_status_id_str']
                caliper_tweet['object']['author']['author_uri'] = 'https://twitter.com/intent/user?user_id=' + tweet['user']['id_str']
                caliper_tweet['object']['author']['author_alias'] = tweet['user']['screen_name']
                caliper_tweet['object']['author']['author_name'] = tweet['user']['name']
                caliper_tweet['object']['text'] = unicode(tweet['text'])
                caliper_tweet['object']['sentiment']['type'] = tweet_sentiment_type
                caliper_tweet['object']['sentiment']['score'] = tweet_sentiment_score
                caliper_tweet['object']['sentiment']['color'] = tweet_sentiment_color
                for x in list(tweet['entities']['hashtags']):
                    hashtag = x['text']
                    hashtags_list.append(hashtag)
                for x in list(tweet['entities']['user_mentions']):
                    mention = x['id_str']
                    mentions_list.append(mention)
                caliper_tweet['object']['user_mentions'] = mentions_list
                caliper_tweet['object']['hashtags'] = hashtags_list
             
                tweets.insert(caliper_tweet)
                
                db_inserts = db_inserts + 1
                
            else:
                raise StopIteration
    except StopIteration:
        print str(db_inserts) + " inserts made in the " + keyword + " collection."

예제 #19

0

파일 보기

                    default=False,
                    help='show extra output')
args = parser.parse_args()

# create a TwitterUserOrder using the command line arg as the query
tuo = TwitterUserOrder(args.username)

# start an index for counting the processed tweets
index = 0

# start a summary of sentiment scores for later averaging
sum_sentiment = 0

try:
    # ask Twitter for the timeline
    for tweet in islice(ts.search_tweets_iterable(tuo), 0, args.limit):

        index = index + 1

        # scrub usernames, special characters and URLs from tweet
        cleanTweet = re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)",
                            " ", tweet['text'])

        # sentiment analysis using TextBlob
        analysis = TextBlob(cleanTweet)

        # define human friendly sentiment scores
        if analysis.sentiment.polarity > 0:
            score = positive
        elif analysis.sentiment.polarity == 0:
            score = neutral

예제 #20

0

파일 보기

파일: test_search.py 프로젝트: tobytf/twitter_form

from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException
from CREDS import *

ts = TwitterSearch(
                  consumer_key = TWITTER_CONSUMER_KEY,
                  consumer_secret = TWITTER_CONSUMER_SECRET, 
                  access_token = TWITTER_ACCESS_TOKEN,
                  access_token_secret = TWITTER_ACCESS_TOKEN_SECRET,
                  )

try:
    tso = TwitterSearchOrder()
    tso.set_keywords(['surveymonkey','docs.google.com/forms'], or_operator=True)

    for tweet in ts.search_tweets_iterable(tso):
        print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text']))

except TwitterSearchException as e:
   print(e)

예제 #21

0

파일 보기

from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException
from CREDS import *

ts = TwitterSearch(
    consumer_key=TWITTER_CONSUMER_KEY,
    consumer_secret=TWITTER_CONSUMER_SECRET,
    access_token=TWITTER_ACCESS_TOKEN,
    access_token_secret=TWITTER_ACCESS_TOKEN_SECRET,
)

try:
    tso = TwitterSearchOrder()
    tso.set_keywords(['surveymonkey', 'docs.google.com/forms'],
                     or_operator=True)

    for tweet in ts.search_tweets_iterable(tso):
        print('@%s tweeted: %s' %
              (tweet['user']['screen_name'], tweet['text']))

except TwitterSearchException as e:
    print(e)

예제 #22

0

파일 보기

    place = raw_input("Enter a twitter handle: ")
    tuo = TwitterUserOrder(place)  # create a TwitterUserOrder

    # it's about time to create TwitterSearch object again
    ts = TwitterSearch(
        consumer_key='jP53etLOQHrdCtMc4j2Djas2z',
        consumer_secret='9UmpzmT1IPF6JuNzODHOyXZU19Vv1C0eYOQraQLwY04jAMGpu4',
        access_token='746046118652416000-BZC8oHZZ75dJe8Q8fGlMigNvKy6kVwK',
        access_token_secret='Nfl6UpuUUdvSy60tN6p7l3l1W0GOGKpQoIbqZg78cdrtd')

    def my_callback_closure(
        current_ts_instance
    ):  # accepts ONE argument: an instance of TwitterSearch
        queries, tweets_seen = current_ts_instance.get_statistics()
        # if queries > 0 and (queries % 60) == 0: # trigger delay every 5th query
        #     time.sleep(30) # sleep for 60 seconds

    tweetArray = []
    # start asking Twitter about the timeline
    for tweet in ts.search_tweets_iterable(tuo, callback=my_callback_closure):
        # tweetArray.append(tweet['text'])
        # if 'accessible' in tweet['text']:
        print tweet['text']
        print(
            json.dumps(alchemy_language.emotion(text=tweet['text'],
                                                language='english'),
                       indent=2))

except TwitterSearchException as e:  # catch all those ugly errors
    print(e)

예제 #23

0

파일 보기

파일: Indiv_AutoScraping_v25.py 프로젝트: zinebmezzour/ScrapingApp

def Tweets():

    try:

        max_feeds = 10
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_language('en')
        tso.set_include_entities(
            False)  # and don't give us all those entity information
        tso.set_until(new_date)
        tso.arguments.update({'tweet_mode': 'extended'})
        tso.arguments.update({'truncated': 'False'})

        ts = TwitterSearch(
            consumer_key='DMHjSht5U0UqNUsAWpZH9DXok',
            consumer_secret=
            'olCjsx8LltiHxEiPHWafExoibDuu4eZT48udXTeSYcQbLQ3juB',
            access_token='1170976252213125121-ftEg9MzF9siFHUmcUkV6zzT7mQV9Db',
            access_token_secret='eNA62T8Ig40Iz1wmKf6baDGHqY3Wh9kxzu9oaOQdGE9h8',
        )

        for c in range(len(MainDF)):
            count = 0

            #kw=[MainDF['twitter'][c]]
            #for h in MainDF['hashtag'][c]:
            #    kw.append(h)

            tso.set_keywords(MainDF['hashtag'][c])
            tweets_list = []

            tuo = TwitterUserOrder(MainDF['twitter'][c])
            #            tuo.set_language('en')
            tuo.set_include_entities(
                False)  # and don't give us all those entity information
            #            tuo.set_until(days_ago)
            #            tuo.set_count(15)
            tuo.arguments.update({'tweet_mode': 'extended'})
            tuo.arguments.update({'truncated': 'False'})

            #for tweet in ts.search_tweets_iterable(tso):
            #    print(tweet)
            #    tweets_list.append([tweet['user']['screen_name'],tweet['full_text']])

            for tweet in ts.search_tweets_iterable(tso):
                if 'retweeted_status' in tweet:
                    None
                    #tweets_list.append([tweet['user']['screen_name'],tweet['retweeted_status']['full_text'],'Retweet of ' + tweet['retweeted_status']['user']['screen_name']])
                else:
                    links = Find(tweet['full_text'])
                    links = ', '.join(link for link in links)
                    #print(tweet)
                    tweets_list.append([
                        MainDF['company'][c], tweet['user']['screen_name'],
                        tweet['full_text'], tweet['created_at'], links
                    ])

            for tweet in ts.search_tweets_iterable(tuo):
                if tweet['lang'] != 'en':
                    #print(tweet)
                    None
                else:

                    # print(tweet)
                    links = Find(tweet['full_text'])
                    links = ', '.join(link for link in links)

                    tweets_list.append([
                        MainDF['company'][c], tweet['user']['screen_name'],
                        tweet['full_text'], tweet['created_at'], links
                    ])
                    count = count + 1

                    if count == max_feeds:
                        break

            if tweets_list != []:
                tweets_datasets[MainDF['company'][c]] = pd.DataFrame(
                    tweets_list)
                tweets_datasets[MainDF['company'][c]].columns = [
                    'Company', 'Source/User', 'Title/Tweet', 'Date', 'Link'
                ]
                tweets_datasets[MainDF['company'][c]].insert(
                    0, 'Category', 'Twitter')

                for i in range(
                        len(tweets_datasets[MainDF['company'][c]]['Date'])):

                    tweets_datasets[MainDF['company'][c]]['Date'][i] = parse(
                        tweets_datasets[MainDF['company'][c]]['Date'][i])
                    tweets_datasets[
                        MainDF['company'][c]]['Date'][i] = tweets_datasets[
                            MainDF['company'][c]]['Date'][i].date()

                    #print(datasets[companies_names[count]])

                tw_current_companies.append(MainDF['company'][c])

            else:
                None

            #tweets_list.append()
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)