예제 #1
0
def test_twitter_search(keywords):
    """Test using the twitter client to search for keywords.

    Args:
        keywords (list): Keywords to search

    Returns:
        list: serializer dumps of all results returned by Twitter

    """
    if not provider.client:
        provider.client = provider.instantiate_client()

    provider._add_validator_context(  # test values
        uri_id=1111111111111111,
        origin=1,
        provider=1,
        scrape_id=1)

    tw_search_order = TwitterSearchOrder()
    tw_search_order.set_keywords(keywords)

    results = provider.client.search_tweets_iterable(tw_search_order)

    return provider._validate(results)
예제 #2
0
def coleta_tweets():

    try:
    
        ts = TwitterSearch(
            consumer_key = '',
            consumer_secret = '',
            access_token = '',
            access_token_secret = ''
        )
    
        tso = TwitterSearchOrder()
        tso.set_keywords(['Harry potter'])
        tso.set_language('pt')
        df = []
        for tweet in ts.search_tweets_iterable(tso):
            df.append('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'])+',')
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) )
        print('Coleta finalizada!')
        
        df = pd.DataFrame(df)
        #df.to_csv('tweets.txt')
        #print('Arquivo salvo.')
        return df
    except TwitterSearchException as e:
        print(e)
예제 #3
0
 def __init__(self):
     self.properties = TwitterSearchOrder()
     self.properties.set_keywords([''])
     self.properties.set_count(100)
     self.properties.set_include_entities(False)
     self.api = TwitterSearch(
         consumer_key=DATASOURCES['twitter']['api_key'],
         consumer_secret=DATASOURCES['twitter']['api_secret'],
         access_token=DATASOURCES['twitter']['access_token'],
         access_token_secret=DATASOURCES['twitter']['access_token_secret'])
예제 #4
0
파일: loops.py 프로젝트: tinanimo/L8pR
 def get_tweets(query):
     from TwitterSearch import TwitterSearch, TwitterSearchOrder
     import itertools
     tso = TwitterSearchOrder()
     tso.set_keywords(query.get('query', '').split(' '))
     # tso.set_language('en')
     tso.set_include_entities(False)
     ts = TwitterSearch(
         consumer_key=app.config.get('TWITTER_CONSUMER_KEY'),
         consumer_secret=app.config.get('TWITTER_CONSUMER_SECRET'),
         access_token=app.config.get('TWITTER_ACCESS_TOKEN'),
         access_token_secret=app.config.get('TWITTER_ACCESS_TOKEN_SECRET')
     )
     return list(itertools.islice(ts.search_tweets_iterable(tso), 0, int(query.get('count', 5))))
예제 #5
0
def SearchOnTwitter(keywords, language):
    """
    Allows to test twitter search library -> Print tweets of interest.
        Parameters:
            - keywords : string array that tweets must contain
            - language : string indicating the language of the interest tweets
        Return :
            - array of tweets
    """
    tweets = []
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords(
            keywords
        )  # let's define all words we would like to have a look for
        tso.set_language(language)  # we want to see German tweets only
        tso.set_include_entities(
            False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(consumer_key=consumer_key,
                           consumer_secret=consumer_secret,
                           access_token=access_token,
                           access_token_secret=access_token_secret)

        # this is where the fun actually starts :)
        for tweet in ts.search_tweets_iterable(tso):
            tweets.append(tweet['text'])

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)

    return tweets
예제 #6
0
def getTweetsByWords(authdata, word,limit=100):
    tso = TwitterSearchOrder()
    tso.set_keywords([word])
    tso.set_include_entities(False)
    ts = TwitterSearch(consumer_key=authdata['consumer_key'], consumer_secret=authdata['consumer_secret'], access_token=authdata['access_token'], access_token_secret=authdata['access_token_secret'])
    result = []
    c = 0
    for tweet in ts.search_tweets_iterable(tso):
        if c == limit:
            break
        result.append(tweet['text'])
        print(c)
        c+=1
    return {'status': 'Task Completed', 'result': result}
    def init_tw_search_lib(self, domain_keyword):
        """
        Init TwitterSearch Library 
        (Copyright (C) 2013 Christian Koepp
        https://github.com/ckoepp/TwitterSearch/tree/master)

        
        Arguments:
            domain_keyword {str} -- The keyword from <domain_keywords_dict> 
                                    that will be used to search in Twitter
        
        Returns:
            [TwitterSearch] -- TwitterSearch object with our secret tokens
            [TwitterSearchOrder] -- TwitterSearchOrder object with initialized attributes
        """

        try:
            tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
            tso.add_keyword(
                domain_keyword)  # add keyword for search in Twitter
            tso.set_language('en')  # we want to see English tweets only
            tso.set_include_entities(
                False)  # and don't give us all those entity information

            # it's about time to create a TwitterSearch object with our secret tokens
            ts = TwitterSearch(
                consumer_key='<your-CONSUMER_KEY>',
                consumer_secret='<your-CONSUMER_SECRET>',
                access_token='<your-ACCESS_TOKEN>',
                access_token_secret='<your-ACCESS_TOKEN_SECRET>')

        except TwitterSearchException as e:  # take care of all those ugly errors if there are some
            print(e)

        return ts, tso
예제 #8
0
def count_tweets_of_app(app_name):
    """
		Counts how many tweets are with the hashtag app_name and COMPETITION_NAME from diferent users

		Args:
			app_name:	name of the app of whose tweets are to be counted

		Returns:
			num of votes (tweets)
	"""

    from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_keywords([
            check_hashtag(app_name), COMPETITION_NAME
        ])  # let's define all words we would like to have a look for

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(consumer_key=TWITTER_API_KEY,
                           consumer_secret=TWITTER_API_KEY_SECRET,
                           access_token=TWITTER_ACCESS_TOKEN,
                           access_token_secret=TWITTER_ACCESS_TOKEN_SECRET)

        # this is where the fun actually starts :)
        users = []
        #count = 0

        for tweet in ts.search_tweets_iterable(tso):

            user = tweet['user']['id']

            #Check if tweet if from the same user
            if user not in users:
                #more info https://dev.twitter.com/overview/api/tweets
                time_tweet = datetime.datetime.strptime(
                    tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y')

                if (COMPETITION_START_DATE <
                        time_tweet) & (time_tweet < COMPETITION_END_DATE):
                    users.append(user)
                    #count += 1 + tweet["retweet_count"]
                    #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

        return len(users)

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
        return -1
def hello_world(keywords):
    response = make_response()
    response.headers.add("Access-Control-Allow-Origin", "*")
    response.headers.add("Access-Control-Allow-Headers", "*")
    response.headers.add("Access-Control-Allow-Methods", "*")
    try:
        tso = TwitterSearchOrder()
        tso.set_keywords([keywords])
        ts = TwitterSearch(keys)
        tweets = []
        for tweet in ts.search_tweets_iterable(tso):
            tweets.append(tweet['text'])
    except TwitterSearchException as e:
        print(e)
    response = jsonify({'status': 200, 'results': tweets})
    return response
예제 #10
0
def configureSearch(id_tweet):
    print("ConfigureSearch: " + str(id_tweet))
    now = datetime.datetime.now()
    datefortweet = datetime.date(now.year, now.month, now.day)

    twSOrder = TwitterSearchOrder()  # create a TwitterSearchOrder object
    #twSOrder.set_keywords(['from:YodaBotter', 'to:YodaBotter'], or_operator = True)
    twSOrder.add_keyword("#AWSNinja")
    #twSOrder.set_language('en') # we want to see English tweets only
    twSOrder.set_include_entities(True)  # and get all the entities incl. Media
    print("Search: " + twSOrder.create_search_url())
    twSOrder.set_since(datefortweet)

    return twSOrder
예제 #11
0
    def post(self):
        query_string = self.request.body_arguments.get('query')
        query = TwitterSearchOrder()
        query.set_keywords(query_string)
        query.set_language('en')
        query.set_include_entities(False)
        results = TwitterClient.search_tweets(query)

        tweets = [tweet['text'] for tweet in results['content']['statuses']]
        tweet_results = indicoio.batch_text_tags(tweets)
        n_tweets = float(len(tweet_results))
        
        scores = defaultdict(float)
        for tweet in tweet_results:
            for category, score in tweet.items():
                scores[category] += score / n_tweets

        category = max(scores, key=lambda x: scores[x])

        data = {
            'scores': scores,
            'category': category
        }
        
        self.write(json.dumps(data))
예제 #12
0
파일: __init__.py 프로젝트: rybo449/tlc
 def __init__(self):
     self.properties = TwitterSearchOrder()
     self.properties.set_keywords([''])
     self.properties.set_count(100)
     self.properties.set_include_entities(False)
     self.api = TwitterSearch(consumer_key=DATASOURCES['twitter']['api_key'],
                              consumer_secret=DATASOURCES['twitter']['api_secret'],
                              access_token=DATASOURCES['twitter']['access_token'],
                              access_token_secret=DATASOURCES['twitter']['access_token_secret'])
예제 #13
0
def count_for_tag(support, hashtag):
    print "Searching for %s " % hashtag
    try:
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.setKeywords([hashtag])  # let's define all words we would like to have a look for
        # tso.setLanguage('en')
        tso.setCount(100)  # please dear Mr Twitter, only give us 7 results per page
        tso.setIncludeEntities(False)  # and don't give us all those entity information

        # it's about time to create a TwitterSearch object with our secret tokens
        ts = TwitterSearch(
            consumer_key="TNX9jTHJgTEyB1IcUECPJ4uSY",
            consumer_secret="5B1R0geyT1Iv2mBc601gaDwuBBjVkabab72UXbzVTDEJ7Z6XAb",
            access_token="143109809-5IAGEaGuuiBRjVVJT9WHUQnAQlOkVcemzhnOpMkx",
            access_token_secret="Yh5WeJo9Z01j42jbTk6tL47zl1Rdox1LJ1d2lJgAAPm0r",
            verify=False,
        )

        for tweet in ts.searchTweetsIterable(tso):
            # print tweet['coordinates']
            if tweet["place"] != None and tweet["place"].has_key("country"):
                country = tweet["place"]["country"]
                __increment(support, country)
                continue
            location = tweet["user"]["location"]
            if len(location) == 0:
                continue
            # try:
            # results = Geocoder.geocode(location)
            #                country = results[0].country
            #                increment(support, country)
            #                continue
            #            except GeocoderError as e:
            #                #print "Could not parse ", location
            #                pass
            country = None
            for cn, cd in country_desc.iteritems():
                if cn.lower() in location.lower():
                    country = cn
                    break
                for desc_part in cd:
                    desc_word = re.compile(r"\b%s\b" % desc_part)
                    if desc_word.search(location):
                        country = cn
                        break
                if country is not None:
                    break
            if country is None:
                pass
                # print( '%s' % location)
            else:
                # print ("Found %s in \"%s\"" % (country, location))
                __increment(support, country)
            if ts.getStatistics()["tweets"] > 1000:
                break

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print (e)
    def getStats(self, url, proxy, headers, timeout):
        """returns (retweet + favorite count) count from twitter API , url is url that could be in a tweet, proxy is 'ip:port' in string, headers should contain user-agent in as an item in dictionary, timeout is maximum time while waiting for response and is an int"""
        count = 0

        tso = TwitterSearchOrder()
        tso.set_search_url('q=' + url)
        tso.set_result_type(result_type='mixed')
        tso.set_include_entities(False)
        tso.set_count(100)

        ts = TwitterSearch(consumer_key=self.ConsumerKey,
                           consumer_secret=self.ConsumerSecret,
                           access_token=self.AccessTokenKey,
                           access_token_secret=self.AccessTokenSecret,
                           proxy=proxy)

        for tweet in ts.search_tweets_iterable(tso):
            count += tweet['retweet_count'] + tweet['favorite_count']

        return count
예제 #15
0
def coleta_tweets():

    try:

        ts = TwitterSearch(consumer_key='',
                           consumer_secret='',
                           access_token='',
                           access_token_secret='')

        tso = TwitterSearchOrder()
        tso.set_keywords(['Harry potter'])
        tso.set_language('pt')
        df = []
        for tweet in ts.search_tweets_iterable(tso):
            df.append('@%s tweeted: %s' %
                      (tweet['user']['screen_name'], tweet['text']) + ',')
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) )
        print('Coleta finalizada!')

        df = pd.DataFrame(df)
        #df.to_csv('tweets.txt')
        #print('Arquivo salvo.')
        return df
    except TwitterSearchException as e:
        print(e)
예제 #16
0
def rq_tweets(game):
    """
    Request tweet metadata according to a game using the Twitter API
    """
    search = TwitterSearchOrder()
    search.set_keywords(game.c_name.split())
    search.set_language('en')

    try:
        for tweet_json in API.search_tweets_iterable(search):

            # Unit filtering
            if not validate_tweet(tweet_json):
                continue

            # Relevancy filtering
            if not relevant_tweet(game, tweet_json):
                continue

            # Remove unwanted information
            tweet_json = {
                'id': tweet_json['id'],
                'text': tweet_json['text'],
                'user': {
                    'name': tweet_json['user']['name']
                },
                'created_at': tweet_json['created_at']
            }

            # Finally add the tweet
            TC['Tweet.game_id'].add(
                CachedTweet(game_id=game.game_id, twitter_data=tweet_json))
    except TwitterSearchException:
        TC['Tweet.game_id'].flush()
        reload_api()
예제 #17
0
파일: __init__.py 프로젝트: rybo449/tlc
class TwitterService(object):
    def __init__(self):
        self.properties = TwitterSearchOrder()
        self.properties.set_keywords([''])
        self.properties.set_count(100)
        self.properties.set_include_entities(False)
        self.api = TwitterSearch(consumer_key=DATASOURCES['twitter']['api_key'],
                                 consumer_secret=DATASOURCES['twitter']['api_secret'],
                                 access_token=DATASOURCES['twitter']['access_token'],
                                 access_token_secret=DATASOURCES['twitter']['access_token_secret'])

    def get_tweets(self, lat, lng):
        self.properties.set_geocode(float(lat), float(lng), 1)
        response = self.api.search_tweets(self.properties)
        return response
예제 #18
0
class TwitterService(object):
    def __init__(self):
        self.properties = TwitterSearchOrder()
        self.properties.set_keywords([''])
        self.properties.set_count(100)
        self.properties.set_include_entities(False)
        self.api = TwitterSearch(
            consumer_key=DATASOURCES['twitter']['api_key'],
            consumer_secret=DATASOURCES['twitter']['api_secret'],
            access_token=DATASOURCES['twitter']['access_token'],
            access_token_secret=DATASOURCES['twitter']['access_token_secret'])

    def get_tweets(self, lat, lng):
        self.properties.set_geocode(float(lat), float(lng), 1)
        response = self.api.search_tweets(self.properties)
        return response
예제 #19
0
    def process(self, uri, scrape, last_check, task):
        """ Implement processing of a URI to get Twitter events.

        Args:
            uri (Uri): An Uri object.
            scrape (Scrape): Scrape from ORM, not saved to database (yet).
            last_check (datetime): when this uri was last successfully scraped.
            task (object): Celery task running this plugin.

        Returns:
            dict: new Event objects.
        """

        self.assess_timeout(task)

        if not self.client:
            self.client = self.instantiate_client()

        self._add_validator_context(uri_id=uri.id,
                                    origin=self.origin.value,
                                    provider=self.provider.value,
                                    scrape_id=scrape.id)

        tw_search_order = TwitterSearchOrder()
        '''
        What needs to be done here would be along the lines of

        keywords = [f'"url:{url.url}"' for url in uri.urls] + [f'"{uri.raw}"']
        for keyword in keywords:
            # as a new task
            tw_search_order.set_keywords([keyword])
            # then run search and process results as currently done for the
            # 'just doi' search shown below.
        '''

        tw_search_order.set_keywords([f'"{uri.raw}"'])
        tw_search_order.set_include_entities(False)  # `True` for retweet info.

        if last_check:
            tw_search_order.set_since = last_check.date()

        results_generator = self.rate_limited_search(tw_search_order, task)

        event_data = self._validate(results_generator)

        events = self._build(
            event_data=event_data,
            uri_id=uri.id,
        )

        self.log_new_events(uri, self.origin, self.provider, events)
        return events
예제 #20
0
def gettwitter(query):
    try:
        tso = TwitterSearchOrder()
        tso.set_language('en')
        tso.set_locale('en')
        tso.set_keywords([query])
        url = "https://twitter.com/search"+tso.create_search_url()
        print url
    except TwitterSearchException as e:
        print(e)
    html = getHtml(url)
    soup = BeautifulSoup(html)
    twits = soup.find_all("p",class_="TweetTextSize")
    twitters=[]
    for t in twits:
        dr = re.compile(r'<[^>]+>',re.S)
        replacedStr = dr.sub('',str(t))
        replacedStr = re.sub(r"([a-zA-z]+://\S*\s{0,1})", "url", replacedStr)
        twitters.append(replacedStr+"\n")
    return twitters
예제 #21
0
    def post(self):
        query_string = self.request.body_arguments.get('query')
        query = TwitterSearchOrder()
        query.set_keywords(query_string)
        query.set_language('en')
        query.set_include_entities(False)
        results = TwitterClient.search_tweets(query)

        tweets = [tweet['text'] for tweet in results['content']['statuses']]
        tweet_results = indicoio.batch_text_tags(tweets)
        n_tweets = float(len(tweet_results))

        scores = defaultdict(float)
        for tweet in tweet_results:
            for category, score in tweet.items():
                scores[category] += score / n_tweets

        category = max(scores, key=lambda x: scores[x])

        data = {'scores': scores, 'category': category}

        self.write(json.dumps(data))
예제 #22
0
    def post(self):
        query_string = self.request.body_arguments.get('query')
        query = TwitterSearchOrder()
        query.set_keywords(query_string)
        query.set_language('en')
        query.set_include_entities(False)
        results = TwitterClient.search_tweets(query)

        tweets = [tweet['text'] for tweet in results['content']['statuses']]
        sentiment = indicoio.batch_sentiment(tweets)
        pairs = sorted(zip(sentiment, tweets))
        n_tweets = float(len(pairs))

        top_n = 5
        most_negative = pairs[:top_n]
        most_positive = list(reversed(pairs[-top_n:]))

        data = {
            'most_positive': most_positive,
            'most_negative': most_negative,
            'average': sum(sentiment) / n_tweets
        }

        self.write(json.dumps(data))
예제 #23
0
    def search(self, query, lang='en', n=10**5):
        """
        Search twitter for specified query.
        Function returns n tweets or as many as can be found for that query.

        Parameters:
        query -- Search query (String)
        lang -- Specify language of tweets, optional, default: 'en' (String)
        n -- Number of tweets to return, optional, default: 10**3 (Int)

        Returns: 
        tweets_out -- Pandas series of tweets of length n
        """
        # Initialise container
        tweets_out = []
        # Setup twitter search
        tso = TwitterSearchOrder()
        tso.set_keywords([query])
        tso.set_language(lang)
        tso.set_include_entities(False)

        # Begin search
        sys.stdout.write("Tweet number out of {0}: ".format(n))
        for i, tweet in enumerate(self.twitter.search_tweets_iterable(tso)):
            # Break from loop when n tweets are reached
            if i == n:
                break
            # Output progress
            if i % 100 == 0:
                sys.stdout.write('{0} '.format(i))
                sys.stdout.flush()
            # Add the next tweet to the container
            tweets_out.append('%s' % (tweet['text']))
        print
        # Return as pandas series as it's easier to work with
        return pd.Series(tweets_out)
예제 #24
0
    def search(self):
        try:
            tso = TwitterSearchOrder()
            tso.set_keywords(*self.search_terms)
            tso.set_include_entities(False)
            tso.set_count(100)

            ts = TwitterSearch(
                consumer_key='aOUVcCWLIYEbUvHW5dLjVc7Gf',
                consumer_secret='8qb3LTAHbj43J40Rxm0RMLAOaP4QoEHfFVGTeJ3S6iUmSBq6JJ',
                access_token='4251433696-ulZx8dJ3QZE95ds0PhXNldeKFhjhBUoGSuGycSE',
                access_token_secret='wx65NQaBHHgwC4xLOgRxFSs4kWWzkg09KkgNkAKHZryks'
            )

            for tweet in ts.search_tweets_iterable(tso):
                self.data.append(tweet['text'])
                self.save_line(tweet['text'])

            # self.save_data(self.data)
        except TwitterSearchException as exception:
            print(exception)
예제 #25
0
    def post(self):
        query_string = self.request.body_arguments.get('query')
        query = TwitterSearchOrder()
        query.set_keywords(query_string)
        query.set_language('en')
        query.set_include_entities(False)
        results = TwitterClient.search_tweets(query)

        tweets = [tweet['text'] for tweet in results['content']['statuses']]
        sentiment = indicoio.batch_sentiment(tweets)
        pairs = sorted(zip(sentiment, tweets))
        n_tweets = float(len(pairs))

        top_n = 5
        most_negative = pairs[:top_n]
        most_positive = list(reversed(pairs[-top_n:]))

        data = {
            'most_positive': most_positive,
            'most_negative': most_negative,
            'average': sum(sentiment)/n_tweets
        }
        
        self.write(json.dumps(data))
예제 #26
0
파일: models.py 프로젝트: scotu/CityFeel
    def fetch_twitter_entries(self):
        origin = self
        max_id = origin.max_id
        since_id = None
        area = origin.area
        try:
            count = 50
            tso = TwitterSearchOrder() # create a TwitterSearchOrder object
            tso.setKeywords(['']) # let's define all words we would like to have a look for
            tso.setResultType('recent')
            if origin.max_id:
                tso.setMaxID(origin.max_id-1)  # as per twitter docs
            tso.setLanguage('en') # we want to see German tweets only
            tso.setGeocode(latitude=area.lat, longitude=area.long, radius=area.rad, km=True)
            tso.setCount(count) # please dear Mr Twitter, only give us 7 results per page
            tso.setIncludeEntities(False) # and don't give us all those entity information

            # it's about time to create a TwitterSearch object with our secret tokens
            ts = TwitterSearch(
                **settings.TWITTER
            )
            total = 0
            for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :)

                max_id = save_tweets(area, origin, tweet)
                if not since_id:
                    since_id = max_id

                #import ipdb;ipdb.set_trace()
                #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

                total += 1

                if total >= 50:
                    break

        except TwitterSearchException as e: # take care of all those ugly errors if there are some
            print(e)
        # except:
        #     pass
        finally:
            origin.max_id = max_id
            origin.save()
def collect_tweets(keyword, count, force=False):    
    from TwitterSearch import TwitterSearch
    from TwitterSearch import TwitterSearchOrder
    import pymongo
    from dateutil.parser import parse
    from alchemyapi import AlchemyAPI
    import ConfigParser
    
    # try:
    #     keyword = sys.argv[1]
    #     count = int(sys.argv[2])
    # except IndexError:
    # 	e_too_few_args = "You did not enter enough arguments. Two are required: keyword, and count"
    # 	raise Exception(e_too_few_args)
    # try:
    #     if sys.argv[3] == '-f':
    #         force = True
    #     else:
    #         e_invalid_argument = "The only option available is -f. It is used to force the script to continue when the Alchemy API limit is exceeded."
    #         raise Exception(e_invalid_argument)    
    # except IndexError:
    #     force = False
    
    # Read the config file for config variables
    config = ConfigParser.RawConfigParser()
    config.read('config.cfg')
    mongo_url = config.get('Mongo', 'db_url')
    
    # Connect to the Mongo database using MongoClient
    
    client = pymongo.MongoClient(mongo_url)
    db = client.get_default_database()
    # Access/create the collection based on the command line argument
    tweets = db[keyword]
    
    #Generate the alchemyapi variable
    alchemyapi = AlchemyAPI()
    
    # To accommodate for hashtags the user can substitute a . for the # in the command line. Lines 30 & 31 return it to a hashtag for the search.
    if keyword[0] is ".":
        keyword = keyword.replace('.', '#')
    
    # Lines 33-42 ensure that the query is not doing duplicate work.
    # First, it counts to see how many documents exist in the collection
    db_count = tweets.count()
    
    # If there are documents in the collection, the collection is queried, tweet objects are sorted by date, and the tweet_id of the most recent tweet is retrieved and later set as the "since_id"
    if db_count is not 0:
        latest_id = tweets.find( {}, { 'object.tweet_id':1 } ).sort("startedAtTime").limit(1)
        latest_id_str = latest_id[db_count-1]['object']['tweet_id']
        latest_id_int = int(latest_id_str)
        print 'Count of documents in the ' + keyword + ' collection is not 0. It is ' + str(db_count) + '. Mongo is now identifying the latest tweet ID to append as a parameter to the API call.'
    # If ther are no documents in the collection, no queries are done, and the since_id is left out of the API call.    
    else:
        print 'The Mongo collection ' + keyword + ' is empty. The script will now collect all tweets.'
        
    # create a TwitterSearchOrder object
    tso = TwitterSearchOrder() 
    
    # let's define all words we would like to have a look for
    tso.set_keywords([keyword])
    
    # Select language
    tso.set_language('en') 
    
    # Include Entity information
    tso.set_include_entities(True)
    
    if db_count is not 0:
        tso.set_since_id(latest_id_int)
        print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.'
    else:
    	print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.'
    
        
    # Create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(
        consumer_key = config.get('Twitter', 'consumer_key'),
        consumer_secret = config.get('Twitter', 'consumer_secret'),
        access_token = config.get('Twitter', 'access_token'),
        access_token_secret = config.get('Twitter', 'access_token_secret')
     )
     
    # Perform the search
    twitter_search = ts.search_tweets_iterable(tso)

    # Start the insert count variable
    db_inserts = 0
    
    # this is where the fun actually starts :)
    try:
        for tweet in twitter_search:
            if db_inserts < count:
                mentions_list = []
                hashtags_list = []
                # Create the caliper_tweet object
                caliper_tweet = {
              "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent",
              "type": "MessagingEvent",
              "startedAtTime": "",
              ## Can be used to query Twitter API for user information
              "actor": "",
              "verb": "tweetSent",
              "object": {
                "type": "MessagingEvent",
                "tweet_id": "",
                "tweet_uri": "",
                "subtype": "tweet",
                ## "to" should be calculated by checking in_reply_to_user_id_str is null. If it's not null, then it should be concatenated to "uri:twitter/user/" and stored in "object"['to']
                "to": "",
                "author": {
                    "author_uri": "",
                    "author_alias": "",
                    "author_name": "",
                    },
                "text": "",
                "sentiment": {
                    "type": "",
                    "score": "",
                    "color": ""
                },
                "parent": "",
                ## "mentions" is an array of the caliper IDs from the user_mentions objects array
                "user_mentions": [],
                ## "hashtags" is an array of the hashtag texts included in the tweet entities
                "hashtags": []
              }
            }
                
                 # Set the re-usable variables
                tweet_text = tweet['text']
                
                ## AlchemyAPI Sentiment Analysis
                tweet_sentiment = ''
                response = alchemyapi.sentiment('text', tweet_text)
                if 'docSentiment' in response.keys():
                    if 'score' in response['docSentiment']:
                        tweet_sentiment_score = response['docSentiment']['score']
                        tweet_sentiment_score = float(tweet_sentiment_score)
                        tweet_sentiment_score = round(tweet_sentiment_score, 2)
                    else:
                        tweet_sentiment_score = 0
                    tweet_sentiment_type = response['docSentiment']['type']
                    tweet_sentiment_score_a = abs(tweet_sentiment_score)
                    if (tweet_sentiment_score) > 0:
                        tweet_sentiment_color = "rgba(0,255,0," + str(tweet_sentiment_score_a) + ")"
                    else: 
                        tweet_sentiment_color = "rgba(255,0,0," + str(tweet_sentiment_score_a) + ")"
                elif force == True:
                    print 'Force option set to true. The tweet_sentiment object will be set with API Limit Exceeded values.'
                    tweet_sentiment_type = 'API Limit Exceeded'
                    tweet_sentiment_score = 0
                    tweet_sentiment_color = 'rgba(0,0,0,0)'
                else:
                    e_alchemy_api_limit = 'Alchemy API daily limit exceeded. Retry search with force=True to continue'
                    raise Exception(e_alchemy_api_limit)
                    
            
                ds = tweet['created_at']
                tweet_date = parse(ds)
                caliper_tweet['startedAtTime'] = tweet_date
                caliper_tweet['actor'] = 'student:' + tweet['user']['screen_name']
                caliper_tweet['object']['tweet_uri'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['id_str']
                caliper_tweet['object']['tweet_id'] = tweet['id_str']
                if tweet['in_reply_to_user_id_str'] is None:
                    caliper_tweet['object']['to'] = 'NoReply'
                    caliper_tweet['object']['parent'] = 'NoReply'
                else:
                    caliper_tweet['object']['to'] = 'https://twitter.com/intent/user?user_id=' + tweet['in_reply_to_user_id_str']
                    if tweet['in_reply_to_status_id_str'] is None:
                        caliper_tweet['object']['parent'] = 'None'
                    else:    
                        caliper_tweet['object']['parent'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['in_reply_to_status_id_str']
                caliper_tweet['object']['author']['author_uri'] = 'https://twitter.com/intent/user?user_id=' + tweet['user']['id_str']
                caliper_tweet['object']['author']['author_alias'] = tweet['user']['screen_name']
                caliper_tweet['object']['author']['author_name'] = tweet['user']['name']
                caliper_tweet['object']['text'] = unicode(tweet['text'])
                caliper_tweet['object']['sentiment']['type'] = tweet_sentiment_type
                caliper_tweet['object']['sentiment']['score'] = tweet_sentiment_score
                caliper_tweet['object']['sentiment']['color'] = tweet_sentiment_color
                for x in list(tweet['entities']['hashtags']):
                    hashtag = x['text']
                    hashtags_list.append(hashtag)
                for x in list(tweet['entities']['user_mentions']):
                    mention = x['id_str']
                    mentions_list.append(mention)
                caliper_tweet['object']['user_mentions'] = mentions_list
                caliper_tweet['object']['hashtags'] = hashtags_list
             
                tweets.insert(caliper_tweet)
                
                db_inserts = db_inserts + 1
                
            else:
                raise StopIteration
    except StopIteration:
        print str(db_inserts) + " inserts made in the " + keyword + " collection."
    # We are stating a new import session, so lets start by writing an session
    # state file with the currently newest tweet ID.
    document_ids = tuple(filter(lambda id_: not id_.startswith('_'), database))
    SESSION_STATE = {
        'previously_newest_tweet': max(document_ids),
        'session_oldest_tweet': None
    }
    SESSION_STATE_FILE.write_text(json.dumps(SESSION_STATE))

# The twitter client may stop iterating the tweets at some point.
# In order to automatically continue at the last position, we put the
# import in a "while"-loop which will be stopped when there are no new
# tweets to import.
while True:
    # First, let's build a search query:
    twitter_query = TwitterSearchOrder()
    twitter_query.set_keywords(TWITTER_SEARCH_KEYWORDS)
    # Only import english tweets as our sentiment analysis will only work
    # with the English language for now.
    twitter_query.set_language('en')
    # We do not require entities (e.g. extracted URLs) as we are only
    # interested in the raw text of the tweet.
    twitter_query.set_include_entities(False)

    # Use the session_oldest_tweet as max_id in the twitter query.
    if SESSION_STATE['session_oldest_tweet']:
        twitter_query.set_max_id(int(SESSION_STATE['session_oldest_tweet']))
        print('Updating tweets older than {}'.format(
            SESSION_STATE['session_oldest_tweet']))
    else:
        print('Start new update session.')
예제 #29
0
###########
# get twitter credentials

with open(path_credentials, 'r') as json_file:
    data = json.load(json_file)

    CONSUMER_KEY = data['CONSUMER_KEY']
    CONSUMER_SECRET = data['CONSUMER_SECRET']
    ACCESS_TOKEN = data['ACCESS_TOKEN']
    ACCESS_SECRET = data['ACCESS_SECRET']

###########
# get the tweets and put in the list list_twitter_dict_responses

try:
    tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
    tso.set_keywords(['telemedicina'])

    ts = TwitterSearch(  # create a TwitterSearch object
        consumer_key=CONSUMER_KEY,
        consumer_secret=CONSUMER_SECRET,
        access_token=ACCESS_TOKEN,
        access_token_secret=ACCESS_SECRET)

    # insert the results in a list of dictionaries

    list_twitter_dict_responses = []
    for tweet in ts.search_tweets_iterable(tso):
        list_twitter_dict_responses.append({
            'user': tweet['user'],
            'datetime': tweet['created_at'],
예제 #30
0
import os
import re
import string
from nltk.stem import WordNetLemmatizer, SnowballStemmer
import configparser

Config = configparser.ConfigParser()
Config.read('config.ini')

CONSUMER_KEY = Config.get('Tokens', 'CONSUMER_KEY')
ACCESS_TOKEN = Config.get('Tokens', 'ACCESS_TOKEN')
ACCESS_SECRET = Config.get('Tokens', 'ACCESS_SECRET')
CONSUMER_SECRET = Config.get('Tokens', 'CONSUMER_SECRET')


tso = TwitterSearchOrder()
tso.set_language('en')
tso.set_include_entities(False)
ts = TwitterSearch(
    consumer_key=CONSUMER_KEY,
    consumer_secret=CONSUMER_SECRET,
    access_token=ACCESS_TOKEN,
    access_token_secret=ACCESS_SECRET,
    tweet_mode='extended')

keyword_list = Config.get('Keywords', 'keyword_list')
keywords_filter = Config.get('Keywords', 'keywords_filter')
keyword_combo = Config.get('Keywords', 'keyword_combo')
stop = Config.get('Stopwords', 'stop')
filter_users = Config.get('Usernames', 'filter_users')
예제 #31
0
def getTweets(politician_id, searchOnlySexistWords):
    try:

        politician = Politician.objects.get(id=politician_id)
        politician_names = [
            politician.first_name + " " + politician.last_name,
            politician.username
        ]

        tso = TwitterSearchOrder()
        searchTerms = []

        if searchOnlySexistWords:
            sexistWords = CONFIG['SEXISTWORDS']
            for word in sexistWords:
                for politician_name in politician_names:
                    searchTerms.append(word + ' ' + politician_name)
        elif searchOnlySexistWords is False:
            searchTerms = politician_names

        tso.set_keywords(searchTerms, or_operator=True)
        tso.set_language("en")
        tso.set_include_entities(False)
        querystr = tso.create_search_url()
        tso.set_search_url(querystr + "&tweet_mode=extended")
        ts = TwitterSearch(consumer_key=CONFIG['CONSUMER_KEY'],
                           consumer_secret=CONFIG['CONSUMER_SECRET'],
                           access_token=CONFIG['ACCESS_TOKEN'],
                           access_token_secret=CONFIG['ACCESS_TOKEN_SECRET'])

        print("**Processing tweets for " +
              str(politician.first_name + " " + politician.last_name) + "**")
        if searchOnlySexistWords:
            tweets = ts.search_tweets_iterable(tso)
            return tweets
        else:
            # will limit to 100 if not only searching sexist words
            tweets = ts.search_tweets(tso)
            return tweets['content']['statuses']

    except TwitterSearchException as e:
        logging.exception("Unable to get new tweets because of" + str(e))
예제 #32
0
def Tweets():

    try:

        max_feeds = 10
        tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
        tso.set_language('en')
        tso.set_include_entities(
            False)  # and don't give us all those entity information
        tso.set_until(new_date)
        tso.arguments.update({'tweet_mode': 'extended'})
        tso.arguments.update({'truncated': 'False'})

        ts = TwitterSearch(consumer_key='',
                           consumer_secret='',
                           access_token='',
                           access_token_secret='',
                           proxy='http://proxy_address')

        for c in range(len(MainDF)):
            count = 0

            #kw=[MainDF['twitter'][c]]
            #for h in MainDF['hashtag'][c]:
            #    kw.append(h)

            tso.set_keywords(MainDF['hashtag'][c])
            tweets_list = []

            tuo = TwitterUserOrder(MainDF['twitter'][c])
            #            tuo.set_language('en')
            tuo.set_include_entities(
                False)  # and don't give us all those entity information
            #            tuo.set_until(days_ago)
            #            tuo.set_count(15)
            tuo.arguments.update({'tweet_mode': 'extended'})
            tuo.arguments.update({'truncated': 'False'})

            #for tweet in ts.search_tweets_iterable(tso):
            #    print(tweet)
            #    tweets_list.append([tweet['user']['screen_name'],tweet['full_text']])

            for tweet in ts.search_tweets_iterable(tso):
                if 'retweeted_status' in tweet:
                    None
                    #tweets_list.append([tweet['user']['screen_name'],tweet['retweeted_status']['full_text'],'Retweet of ' + tweet['retweeted_status']['user']['screen_name']])
                else:
                    links = Find(tweet['full_text'])
                    links = ', '.join(link for link in links)
                    #print(tweet)
                    tweets_list.append([
                        MainDF['company'][c], tweet['user']['screen_name'],
                        tweet['full_text'], tweet['created_at'], links
                    ])

            for tweet in ts.search_tweets_iterable(tuo):
                if tweet['lang'] != 'en':
                    #print(tweet)
                    None
                else:

                    # print(tweet)
                    links = Find(tweet['full_text'])
                    links = ', '.join(link for link in links)

                    tweets_list.append([
                        MainDF['company'][c], tweet['user']['screen_name'],
                        tweet['full_text'], tweet['created_at'], links
                    ])
                    count = count + 1

                    if count == max_feeds:
                        break

            if tweets_list != []:
                tweets_datasets[MainDF['company'][c]] = pd.DataFrame(
                    tweets_list)
                tweets_datasets[MainDF['company'][c]].columns = [
                    'Company', 'Source/User', 'Title/Tweet', 'Date', 'Link'
                ]
                tweets_datasets[MainDF['company'][c]].insert(
                    0, 'Category', 'Twitter')

                for i in range(
                        len(tweets_datasets[MainDF['company'][c]]['Date'])):

                    tweets_datasets[MainDF['company'][c]]['Date'][i] = parse(
                        tweets_datasets[MainDF['company'][c]]['Date'][i])
                    tweets_datasets[
                        MainDF['company'][c]]['Date'][i] = tweets_datasets[
                            MainDF['company'][c]]['Date'][i].date()

                    #print(datasets[companies_names[count]])

                tw_current_companies.append(MainDF['company'][c])

            else:
                None

            #tweets_list.append()
            #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

    except TwitterSearchException as e:  # take care of all those ugly errors if there are some
        print(e)
예제 #33
0
from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException
from CREDS import *

ts = TwitterSearch(
                  consumer_key = TWITTER_CONSUMER_KEY,
                  consumer_secret = TWITTER_CONSUMER_SECRET, 
                  access_token = TWITTER_ACCESS_TOKEN,
                  access_token_secret = TWITTER_ACCESS_TOKEN_SECRET,
                  )

try:
    tso = TwitterSearchOrder()
    tso.set_keywords(['surveymonkey','docs.google.com/forms'], or_operator=True)

    for tweet in ts.search_tweets_iterable(tso):
        print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text']))

except TwitterSearchException as e:
   print(e)
예제 #34
0
from TwitterSearch import *
import time
try:
    tso = TwitterSearchOrder()
    html = open("/usr/python/progect/profileSamples.txt",'r+').read()
    tso.set_keywords(['foo', 'bar'])
    ts = TwitterSearch(
        consumer_key='ooRo1YonXsAvS7RsgNYSCdxws',
        consumer_secret='0zX2rbeNiz5qxnUC9MB9uUErTB9EGDSG4OtU57tuPCsNI5u6M2', 
        access_token='3107850807-zCmoMZSwaRCvjuw9wybqBoyrGIDIkRoF7RU2am3',
        access_token_secret='FrHzLCkIHTXk8h62E5Pyl5ISNfk6ux3hvJDQiiEt9JqRE'
    )
    def my_callback_closure(current_ts_instance): # accepts ONE argument: an instance of TwitterSearch
        queries, tweets_seen = current_ts_instance.get_statistics()
        if queries > 0 and (queries % 5) == 0: # trigger delay every 5th query
            time.sleep(60) # sleep for 60 seconds
    for tweet in ts.search_tweets_iterable(tso, callback=my_callback_closure):
        print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
except TwitterSearchException as e:
    print(e)


#----------get feature twitter-------------------------------

# twitters = []
# for t in pfeature:
#     twitters.extend(gettwitter(t))
#     time.sleep(random.randint(3, 6))
#     print(t)

# f = open('/usr/python/progect/'+num+'twitter.txt','w')
예제 #35
0
 def _prepare_request(self):
     tso = TwitterSearchOrder()
     tso.setKeywords(self._get_keywords())
     tso.setLanguage(self.language)
     tso.setIncludeEntities(False)
     return tso
예제 #36
0
파일: twitter.py 프로젝트: tjcsl/cslbot
def cmd(send, msg, args):
    """
    Search the Twitter API.
    Syntax: {command} <query> <--user username> <--count 1>
    """
    if not msg:
        send('What do you think I am, a bird?')
        return

    parser = arguments.ArgParser(args['config'])
    parser.add_argument('query', nargs='*')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--user', dest='user', default=None)
    group.add_argument('--count', dest='count', type=int, default=1)
    group.add_argument('--random', action='store_true', default=False)

    try:
        cmdargs = parser.parse_args(msg)
    except arguments.ArgumentException as e:
        send(str(e))
        return

    api = get_search_api(args['config'])

    query = TwitterSearchOrder()
    keywords = [' '.join(cmdargs.query)]
    if cmdargs.user:
        keywords += ['from:{}'.format(cmdargs.user)]
    query.set_keywords(keywords)
    query.set_language('en')
    query.set_result_type('recent')
    query.set_include_entities(False)
    query.set_count(cmdargs.count)

    results = list(api.search_tweets_iterable(query))
    if not results:
        send('No tweets here!')
        return

    if cmdargs.random:
        shuffle(results)

    max_chan_tweets = 5
    max_pm_tweets = 25
    if cmdargs.count > max_pm_tweets:
        send("That's too many tweets! The maximum allowed through PM is {}".format(max_pm_tweets))
        return

    if cmdargs.count > max_chan_tweets:
        send("That's a lot of tweets! The maximum allowed in a channel is {}".format(max_chan_tweets))

    for i in range(0, min(cmdargs.count, max_pm_tweets)):
        if cmdargs.count <= max_chan_tweets:
            send(tweet_text(results[i]))
        else:
            send(tweet_text(results[i]), target=args['nick'])
예제 #37
0
    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config['query'], True)

        language = self._config.get('language', None)
        if language:
            tso.set_language(language)

        tso.set_result_type('recent')
        tso.set_count(self._config.get('count', 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets['content']['statuses']
        except Exception as e:
            self._logger.exception('Polling Twitter failed: %s' % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]['id'])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)
예제 #38
0
from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException
from CREDS import *

ts = TwitterSearch(
    consumer_key=TWITTER_CONSUMER_KEY,
    consumer_secret=TWITTER_CONSUMER_SECRET,
    access_token=TWITTER_ACCESS_TOKEN,
    access_token_secret=TWITTER_ACCESS_TOKEN_SECRET,
)

try:
    tso = TwitterSearchOrder()
    tso.set_keywords(['surveymonkey', 'docs.google.com/forms'],
                     or_operator=True)

    for tweet in ts.search_tweets_iterable(tso):
        print('@%s tweeted: %s' %
              (tweet['user']['screen_name'], tweet['text']))

except TwitterSearchException as e:
    print(e)
예제 #39
0
def twitter_search(params, start_time):
    """
    Retrieves most recent tweets since yesterday based on keywords.
    Retrieves as many tweets as api gives, up to the maximum set by max_tweets.
    :param params: The keywords to search for, formatted as list of 
    strings. To search for a url, use this syntax:
        "url:\"gizmodo com\""
    in which the domain is separated by spaces instead of dots and the 
    internal quotes are escaped with backspaces.
    :return: Returns list of dicts containing:
      - tweets: the number of tweets, since yesterday, about the specified
      keywords (up to a maximum count of max_tweets)
      - tweets_followers: the number of (unique) followers of those tweets
      (i.e., if the same person tweets ten times in one day, that person's
      followers are counted once, not ten times).
      - most_followed_name: the name of the tweeter who tweeted in 'tweets'
      (above) who has the most followers
      - most_followed_count: the count of the number of followers who follow
      the tweeter with the most followers
    """
    print('starting twitter_search')
    # Set up flow control variables.
    max_tweets = 10000  # maximum number of tweets to retrieve from api
    more_tweets = True  # are there more tweets to retrieve?
    need_to_sleep = False  # tells to sleep (if approaching api rate limit)

    error = 'ok'

    try:
        # create TwitterSearch object using this app's tokens.
        ts = TwitterSearch(
            consumer_key=tw.CONSUMER_KEY,
            consumer_secret=tw.CONSUMER_SECRET,
            access_token=tw.ACCESS_TOKEN,
            access_token_secret=tw.ACCESS_TOKEN_SECRET
        )

        # Create a TwitterSearchOrder object and add keywords to it.
        tso = TwitterSearchOrder()
        for param in params:
            tso.add_keyword(param)
        # Only search for tweets since yesterday (in UTC).
        yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(1)
        tso.set_since(yesterday)

        # Set up counter variables.
        tweets = 0  # count of tweets about keywords, since yesterday
        unique_tweeters = {}  # dict of unique tweeters about keywords
        tweets_followers = 0  # count of followers of unique_tweeters
        min_id = 0  # next tweet for paginated results, when multiple api calls
        max_followers = (0, 'null')  # the tweeter with the most followers

        # Keep calling the api (for paginated results) until there are no
        # more tweets to retrieve, or until max_tweets limit has been reached.
        while more_tweets and tweets < max_tweets:
            # Sleep for 60 seconds, if needed, to avoid hitting api limit.
            if need_to_sleep:
                print("rate limit:", rate_limit)
                time.sleep(60)
            # Call the search api.
            response = ts.search_tweets(tso)
            # Are there no more tweets to retrieve?
            if len(response["content"]["statuses"]) == 0:
                more_tweets = False
            else:  # there are more tweets to retrieve
                # Iterate through the batch of tweets retrieved from this
                # api call. Count the tweet and track all the unique tweeters.
                for tweet in response["content"]["statuses"]:
                    if tweets > max_tweets:
                        break  # stop counting/tracking if reached max_tweets
                    tweets += 1
                    if (min_id == 0) or (tweet["id"] < min_id):
                        # Set min_id to the id of this tweet. The api returns
                        # tweets in reverse chronological order (most recent is
                        # first), so min_id is a lowering "ceiling" of which
                        # tweet id to start from during subsequent api call.
                        min_id = tweet["id"]
                    # Can uncomment the following lines to see who is tweeting.
                    # print(str(tweets) + "\t" + str(tweet["id"])
                    #       + "\t" + tweet["user"]["screen_name"]
                    #       + "\t" + str(tweet["user"]["followers_count"]))
                    if tweet["user"]["screen_name"] not in unique_tweeters:
                        tweeter = tweet["user"]["screen_name"]
                        tweeters_followers = tweet["user"]["followers_count"]
                        # Add tweet's screen_name and followers_count to
                        # unique_tweeters, iff this is first time seeing
                        # this screen_name.
                        unique_tweeters[tweeter] = tweeters_followers
                # Set the next paginated result's start point (subtract one
                # to avoid retrieving the last tweet from this batch twice).
                tso.set_max_id(min_id - 1)
            # If less than 15 api calls remaining then sleep during next loop.
            # (Search api free tier allows 180 calls per 15 minute period.)
            rate_limit = int(ts.get_metadata()["x-rate-limit-remaining"])
            if rate_limit < 15:
                need_to_sleep = True
            else:
                need_to_sleep = False
        # After all tweets have been retrieved (up to max_tweets), calculate
        # metrics on the followers of the tweeters in unique_tweeters.
        for tweeter in unique_tweeters:
            # Count how many followers there are in all the unique_tweeters.
            tweets_followers += unique_tweeters[tweeter]
            # Determine which tweeter from unique_tweeters has most followers.
            if unique_tweeters[tweeter] > max_followers[0]:
                max_followers = (unique_tweeters[tweeter], tweeter)

    except TwitterSearchException as e:
        tweets = None
        tweets_followers = None
        error = format_exception(ValueError, e, e.__traceback__)

    tweets = make_dict(
        value=tweets,
        data_name='tweets',
        start_time=start_time,
        status=error
    )

    tweets_followers = make_dict(
        value=tweets_followers,
        data_name='tweets_followers',
        start_time=start_time,
        status=error
    )

    most_followed_name = make_dict(
        value=escape(max_followers[1], True),
        data_name='most_followed_name',
        start_time=start_time,
        status=error
    )

    most_followed_count = make_dict(
        value=max_followers[0],
        data_name='most_followed_count',
        start_time=start_time,
        status=error
    )

    return [tweets, tweets_followers, most_followed_name, most_followed_count]
예제 #40
0
    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config['query'])

        language = self._config.get('language', None)
        if language:
            tso.set_language(language)

        tso.set_result_type('recent')
        tso.set_count(self._config.get('count', 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets['content']['statuses']
        except Exception as e:
            self._logger.exception('Polling Twitter failed: %s' % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]['id'])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)
예제 #41
0
# Establish connection to CouchDB and select the database to write into.
# The database must already exist; create it manually in the CouchDB control
# panel first.
database = couchdb.Server()[COUCH_DATABASE_NAME]

# Setup a twitter connection and configure its credentials:
twitter_connection = TwitterSearch(**TWITTER_CREDENTIALS)

# The twitter client may stop iterating the tweets at some point.
# In order to automatically continue at the last position, we put the
# import in a "while"-loop which will be stopped when there are no new
# tweets to import.
while True:
    # First, let's build a search query:
    twitter_query = TwitterSearchOrder()
    twitter_query.set_keywords(TWITTER_SEARCH_KEYWORDS)
    # Only import english tweets as our sentiment analysis will only work
    # with the English language for now.
    twitter_query.set_language('en')
    # We do not require entities (e.g. extracted URLs) as we are only
    # interested in the raw text of the tweet.
    twitter_query.set_include_entities(False)

    document_ids = tuple(filter(lambda id_: not id_.startswith('_'), database))
    if len(document_ids) > 0:
        # If we already have imported tweets, we should continue with the oldest
        # tweet we know and work our way to older tweets from there.
        # We do that by setting the max_id query parameter to the oldest tweet
        # we know.
        oldest_id = min(document_ids)
    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config["query"])

        language = self._config.get("language", None)
        if language:
            tso.set_language(language)

        tso.set_result_type("recent")
        tso.set_count(self._config.get("count", 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets["content"]["statuses"]
        except Exception as e:
            self._logger.exception("Polling Twitter failed: %s" % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]["id"])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)
# Lines 33-42 ensure that the query is not doing duplicate work.
# First, it counts to see how many documents exist in the collection
db_count = tweets.count()

# If there are documents in the collection, the collection is queried, tweet objects are sorted by date, and the tweet_id of the most recent tweet is retrieved and later set as the "since_id"
if db_count is not 0:
    latest_id = tweets.find( {}, { 'object.tweet_id':1 } ).sort("startedAtTime").limit(1)
    latest_id_str = latest_id[db_count-1]['object']['tweet_id']
    latest_id_int = int(latest_id_str)
    print 'Count of documents in the ' + keyword + ' collection is not 0. It is ' + str(db_count) + '. Mongo is now identifying the latest tweet ID to append as a parameter to the API call.'
# If ther are no documents in the collection, no queries are done, and the since_id is left out of the API call.    
else:
    print 'The Mongo collection ' + keyword + ' is empty. The script will now collect all tweets.'
    
# create a TwitterSearchOrder object
tso = TwitterSearchOrder() 

# let's define all words we would like to have a look for
tso.set_keywords([keyword])

# Select language
tso.set_language('en') 

# Include Entity information
tso.set_include_entities(True)

if db_count is not 0:
    tso.set_since_id(latest_id_int)
    print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.'
else:
	print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.'
예제 #44
0
def cmd(send, msg, args):
    """
    Search the Twitter API.
    Syntax: {command} <query> <--user username> <--count 1>
    """
    if not msg:
        send('What do you think I am, a bird?')
        return

    parser = arguments.ArgParser(args['config'])
    parser.add_argument('query', nargs='*')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--user', dest='user', default=None)
    group.add_argument('--count', dest='count', type=int, default=1)
    group.add_argument('--random', action='store_true', default=False)

    try:
        cmdargs = parser.parse_args(msg)
    except arguments.ArgumentException as e:
        send(str(e))
        return

    api = get_search_api(args['config'])

    query = TwitterSearchOrder()
    keywords = [' '.join(cmdargs.query)]
    if cmdargs.user:
        keywords += [f'from:{cmdargs.user}']
    query.set_keywords(keywords)
    query.set_language('en')
    query.set_result_type('recent')
    query.set_include_entities(False)
    query.set_count(cmdargs.count)

    results = list(api.search_tweets_iterable(query))
    if not results:
        send('No tweets here!')
        return

    if cmdargs.random:
        shuffle(results)

    max_chan_tweets = 5
    max_pm_tweets = 25
    if cmdargs.count > max_pm_tweets:
        send(
            f"That's too many tweets! The maximum allowed through PM is {max_pm_tweets}"
        )
        return

    if cmdargs.count > max_chan_tweets:
        send(
            f"That's a lot of tweets! The maximum allowed in a channel is {max_chan_tweets}"
        )

    for i in range(0, min(cmdargs.count, max_pm_tweets)):
        if cmdargs.count <= max_chan_tweets:
            send(tweet_text(results[i]))
        else:
            send(tweet_text(results[i]), target=args['nick'])