Exemple #1
0
def start_stream(auth, l):
    while True:
        try:
            stream = Stream(auth, l)
            stream.sample()
        except:
            continue
Exemple #2
0
 def _get_tweets(self):
     auth = OAuthHandler(self.conf['oauth.consumer_key'],
                         self.conf['oauth.consumer_secret'])
     auth.set_access_token(self.conf['oauth.access_token'],
                           self.conf['oauth.access_token_secret'])
     stream = Stream(auth, QueueListener(self.queue))
     stream.sample(languages=['en'])
Exemple #3
0
def start_stream(auth, l):
    while True:
        try:
            stream = Stream(auth, l)
            stream.sample()
        except:
            continue
Exemple #4
0
def main():
    # open stream
    listener = QueueListener()
    stream = Stream(listener.auth, listener, language='ja')

    # [stream filter]
    stream.filter(languages=["ja"],
                  track=[
                      '私', 'あなた', '俺', 'ー', 'する', 'です', 'ます', 'けど', '何', '@',
                      '#', '#', '。', ',', '!', '?', '…', '.', '!', '?', ',',
                      ':', ':', '』', ')', ')', '...'
                  ])
    #stream.filter(languages=["ja"], track=['私','あなた','俺','ー','する','です','ます','けど','何','I', 'you', 'http', 'www', 'co', '@', '#', '#', '。', ',', '!','?','…', '.', '!','?', ',', ':', ':', '』', ')', ')', '...'])

    # Default Script# stream.filter(locations=[-122.75,36.8,-121.75,37.8])  # San Francisco
    # stream.filter(locations=[-74,40,-73,41])  # New York City
    # stream.filter(languages=["en"], track=['python', 'obama', 'trump'])

    try:
        while True:
            try:
                stream.sample()  # blocking!
            except KeyboardInterrupt:
                print('KEYBOARD INTERRUPT')
                return
            except (socket.error, http.client.HTTPException):
                global tcpip_delay
                print('TCP/IP Error: Restarting after %.2f seconds.' %
                      tcpip_delay)
                time.sleep(min(tcpip_delay, MAX_TCPIP_TIMEOUT))
                tcpip_delay += 0.25
    finally:
        stream.disconnect()
        print('Exit successful, corpus dumped in %s' % (listener.dumpfile))
Exemple #5
0
    def get_streaming_data(self):
        tweets_grabbed = 0
        while (tweets_grabbed < self.num_tweets_to_grab):
            twitterStream = Stream(self.auth, listener(self.s, self.twit_utils, self.num_tweets_to_grab, self.retweet_count))
            try:               
                twitterStream.sample()
            except Exception as e:
                    print("Error. Restarting Stream.... Error: ")
                    print(e.__doc__)
                    #print(e.message)
                    print("Le Error! Restart")
                    time.sleep(3) # Sleep for 5 minutes if error ocurred
            finally:
                tweets_grabbed = self.s.get_tweets_grabbed()
                print("tweets_grabbed = ", tweets_grabbed)

        lang, top_lang,love_words, swear_words, top_tweets, countries = self.s.get_stats()

        print(Counter(lang))
        print(Counter(top_lang))
        print("Love Words {} Swear Words {}".format(love_words, swear_words))
        print(Counter(countries))

        self.c.execute("INSERT INTO lang_data VALUES (?,?, DATETIME('now'))", (str(list(Counter(lang).items())), str(list(Counter(top_lang).items()))))

        self.c.execute("INSERT INTO love_data VALUES (?,?, DATETIME('now'))", (love_words, swear_words))

        for t in top_tweets:
            self.c.execute("INSERT INTO twit_data VALUES (?, DATETIME('now'))", (t,))

        self.c.execute("INSERT INTO country_data VALUES (?, DATETIME('now'))", (str(list(Counter(countries).items())),))

        self.conn.commit()
Exemple #6
0
def main():
    while True:
        try:
            with utils.connect('ng') as conn:
                with conn.cursor() as cur:
                    cur.execute("""CREATE TABLE IF NOT EXISTS {tablename}(
                        id_str text PRIMARY KEY,
                        source text,
                        user_id text,
                        created_at timestamp,
                        text text)""".format(tablename=TABLENAME))
                data_streamer = tweet.PostgresStreamer(conn=conn,
                                                       tablename=TABLENAME)
                auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
                auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET)
                stream = Stream(auth, data_streamer)
                stream.sample(languages=LANGUAGES)
                conn.commit()
        except KeyboardInterrupt:
            stream.disconnect()
            break
        except (IndexError, ConnectionError, ProtocolError, ReadTimeoutError):
            #logger.exception(e)
            stream.disconnect()
            time.sleep(90)
            continue
def main():
    """Connects to the stream and starts threads to write them to a file."""
    listener = QueueListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    writer_thread = threading.Thread(target=worker, args=(listener, ))
    writer_thread.start()

    stream = Stream(auth, listener)

    print_status(listener)

    while True:
        try:
            stream.sample()  # blocking!
        except KeyboardInterrupt:
            print 'KEYBOARD INTERRUPT:'
            return
        except (socket.error, httplib.HTTPException):
            global tcpip_delay
            print('TCP/IP Error: Restarting after '
                  '{} seconds.'.format(tcpip_delay))
            time.sleep(min(tcpip_delay, MAX_TCPIP_TIMEOUT))
            tcpip_delay += 0.25
        finally:
            print 'Disconnecting stream'
            stream.disconnect()
            print 'Waiting for last tweets to finish processing'
            # Send poison pill to writer thread and wait for it to exit
            listener.queue.put(None)
            listener.queue.join()
            print 'Waiting for writer thread to finish'
            writer_thread.join()
            print 'Exit successful'
def main():
    # {path}は自分の環境にあわせる
    db_path = str(Path.home()) + "{path}/Conversation." + date.today(
    ).strftime("20%y.%m.%d") + ".db"
    parser = argparse.ArgumentParser()
    parser.add_argument('--new',
                        type=int,
                        default=-1,
                        help='-1 indicates new database...')
    args = parser.parse_args()
    if args.new == -1:
        make_db(db_path)
    listener = QueueListener(db_path)
    stream = Stream(listener.auth, listener)
    print("Listening...\n")
    delay = 0.25
    try:
        while True:
            try:
                stream.sample()
            except KeyboardInterrupt:
                print('Stopped')
                return
            except urllib3.exceptions.ProtocolError as e:
                print("Incomplete read", e)
            except urllib3.exceptions.ReadTimeoutError as e:
                print("Read Timeout", e)
            except (socket.error, http.client.HTTPException):
                print("HTTP error waiting for a few seconds")
                time.sleep(delay)
                delay += 0.25
    finally:
        stream.disconnect()
Exemple #9
0
def interactive(username=None, password=None, filenames=None):
    if not username:
        username = raw_input('Username: ').strip()
    if not password:
        password = getpass("Password: ").strip()
    s = Stream(username, password, TWSSBuildClassifierListner())
    s.sample()
Exemple #10
0
def start_stream():
    while True:
        try:
            listener = Streamlistener()
            twitterStream = Stream(
                auth,
                listener)  #initialize Stream object with a time out limit
            # twitterStream.filter(track=keyword_list)  #call the filter method to run the Stream Object

            #https://dev.twitter.com/streaming/reference/get/statuses/sample
            twitterStream.sample()

            #https://github.com/azurro/country-bounding-boxes/blob/master/dataset/ph.json
            #twitterStream.filter(locations=[112.16672,4.3833541,127.0737203,21.5296298])  #call the filter method to run the Stream Object
            #twitterStream.filter(locations=[12.865353, -168.344469, 67.929266, -42.133528])  #call the filter method to run the Stream Object
            #twitterStream.filter(locations=[-56.286063, -29.301501,74.250793, 174.253191,-63.874556, -170.629625,70.718096, -29.828842 ])  #call the filter method to run the Stream Object
            #twitterStream.filter(locations=[-69.624350, -23.9022753, -57.353627, -7.164010 ])  #call the filter method to run the Stream Object
        except Exception, e:
            # Oh well, reconnect and keep trucking
            print 'Generic exception happened. Continuing..', str(e)
            pass
        except KeyboardInterrupt:
            # Or however you want to exit this loop
            twitterStream.disconnect()
            break
Exemple #11
0
    def stream_tweets(self):
        listener = StdOutListener()
        auth = OAuthHandler(config.api_key, config.api_secret_key)
        auth.set_access_token(config.access_token, config.access_token_secret)
        stream = Stream(auth, listener, tweet_mode='extended')

        stream.sample()
Exemple #12
0
class streamsample(StreamListener): 
   
    # constructor for stream
    def __init__(self):
       l = StdOutListener() 
       auth = OAuthHandler(consumer_key, consumer_secret)
       auth.set_access_token(access_token, access_token_secret) 
       self.stream = Stream(auth, l)    

    def printtest(self):
	self.stream.sample()
	
    # Sends tweet to the sqlite3 database
    def todatabase(self, numtweets = 100):
	
	connection = sqlite3.connect(DATABASE_NAME)
	cursor = connection.cursor()

	try:
	    count = 0
	    while count < numtweets:
		for tweet_json in self.stream:
		    #check whether the tweet is actually a tweet
		    if tweet.get('text') and tweet['user']['lang'] == 'en':
			parsetweet(tweet_json)
		    break
		count += 1

	except stream.ConnectionError, e:
	    print "Disconnected"
class TwitterIngestor():
    """
    Ingests data from Twitter
    """
    def __init__(self, twitter_config, callback):
        """
        Creates an OAuth handler and a Twitter stream
        :param twitter_config:
        :param callback:
        """
        self.callback = callback
        self.auth = OAuthHandler(twitter_config['customer_key'],
                                 twitter_config['customer_secret'])
        self.auth.set_access_token(twitter_config['access_token'],
                                   twitter_config['access_secret'])
        self.twitter_stream = Stream(self.auth, TwitterListener(self.callback))
        logger.info('Initialized Twitter Ingestor')

    def start(self):
        """
        Starts Twitter streaming
        :return:
        """
        logger.info('Starting Twitter feed ingestion')
        self.twitter_stream.sample(is_async=True, languages=['en'])
        logger.info('Started Twitter feed ingestion')

    def stop(self):
        """
        Requests Twitter stream to start receiving messages
        :return:
        """
        logger.info('Stopping Twitter feed ingestion')
        self.twitter_stream.disconnect()
        logger.info('Stopped Twitter feed ingestion')
def print_to_terminal():
    listener = PrintListener()
    stream = Stream(auth, listener)
    #Tweets in English Language Only
    languages = ('en',)
    stream.sample(languages=languages)
    stream.sample()
def main():
    while True:
        try:
            with utils.connect('ng') as conn:
                with conn.cursor() as cur:
                    cur.execute("""CREATE TABLE IF NOT EXISTS {tablename}(
                        id_str text PRIMARY KEY,
                        source text,
                        user_id text,
                        created_at timestamp,
                        text text)""".format(tablename=TABLENAME))
                data_streamer = tweet.PostgresStreamer(conn=conn, tablename=TABLENAME)
                auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
                auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET)
                stream = Stream(auth, data_streamer)
                stream.sample(languages=LANGUAGES)
                conn.commit()
        except KeyboardInterrupt:
            stream.disconnect()
            break
        except (IndexError, ConnectionError, ProtocolError, ReadTimeoutError):
            #logger.exception(e)
            stream.disconnect()
            time.sleep(90)
            continue
Exemple #16
0
def interactive(username=None, password=None, filenames=None):
    if not username:
        username = raw_input('Username: ').strip()
    if not password:
        password = getpass("Password: ").strip()
    s = Stream(username, password, TWSSBuildClassifierListner())
    s.sample()
Exemple #17
0
def printStream():
    #This handles Twitter authetification and the connection to Twitter Streaming API
    l = StdOutListener()
    auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
    stream = Stream(auth, l)
    stream.sample()
Exemple #18
0
    def get_streaming_data(self):
        twitter_stream = Stream(
            self.auth,
            twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab,
                             retweet_count=self.retweet_count,
                             stats=self.stats,
                             get_tweet_html=self.get_tweet_html))
        try:
            twitter_stream.sample()
        except Exception as e:
            print(e.__doc__)

        lang, top_lang, top_tweets = self.stats.get_stats()
        print(Counter(lang))
        print(Counter(top_lang))
        print(len(top_tweets))

        self.c.execute("INSERT INTO lang_data VALUES (?,?, DATETIME('now'))",
                       (str(list(Counter(lang).items())),
                        str(list(Counter(top_lang).items()))))

        for t in top_tweets:
            self.c.execute("INSERT INTO twit_data VALUES (?, DATETIME('now'))",
                           (t, ))

        self.conn.commit()
Exemple #19
0
def run_sample():
    l = TweetListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    stream = Stream(auth, l)
    stream.sample()
def sample():
    '''Calling the sampling API provided by twitter.'''
    listener = TweetStreamListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)

    stream = Stream(auth, listener)
    stream.sample()
Exemple #21
0
 def get_streaming_data(self):
     twitter_stream = Stream(
         self.auth, twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab, retweet_count=self.retweet_count)
     )
     try:
         twitter_stream.sample()
     except Exception as e:
         print(e.__doc__)
Exemple #22
0
def main():

    listener = Listener(100000)
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, listener)

    stream.sample()
def sample():
    '''Get random sample of all public statuses.'''
    listener = TweetStreamListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)

    stream = Stream(auth, listener)
    stream.sample()
def save_tweets():
    print('hi')
    directory = _get_dir_absolute_path()
    filepath = path.join(directory, 'tweets.json')

    listener = DatabaseListener(number_tweets_to_save=150)
    stream = Stream(auth, listener)
    languages = ('en,fa',)
    stream.sample(languages=languages)
Exemple #25
0
 def get_tweets():
     l = INDXListener()
     auth = OAuthHandler(consumer_key, consumer_secret)
     auth.set_access_token(access_token, access_token_secret)
     stream = Stream(auth, l)
     if len(args['words']) > 0:
         stream.filter(track=args['words'].split(","))
     else:
         stream.sample()
 def _get_tweets(self):
     auth = OAuthHandler(
         self.conf['oauth.consumer_key'],
         self.conf['oauth.consumer_secret'])
     auth.set_access_token(
         self.conf['oauth.access_token'],
         self.conf['oauth.access_token_secret'])
     stream = Stream(auth, QueueListener(self.queue))
     stream.sample(languages=['en'])
class TweetDownloader(safethread.SafeThread, StreamListener):

    def __init__(self,
                 destpath,
                 consumer_key,
                 consumer_secret,
                 access_token,
                 access_secret,
                 window = 10000,
                 verbose = False):
        super(TweetDownloader, self).__init__(name="TweetDownloader")
        self.destpath = destpath
        self.consumer_key = consumer_key
        self.consumer_secret = consumer_secret
        self.access_token = access_token
        self.access_secret = access_secret
        self.prefix = 'tweets'
        self.suffix = 'txt'
        self.window = window
        self.buf = collections.deque()
        self.stopped = True

    # Write the tweet text to the current file. May throw an error if the file
    # is currently being switched out (i.e. writing at the end of a window).
    def write(self, vals):
        self.buf.appendleft(json.dumps(vals))

    def action(self):
        if len(self.buf) > 0:
            self.f.write(self.buf.pop() + '\n')

        if ((time.time() * 1000) - self.begin > self.window):
            self.f.close()
            fname = self.destpath + self.prefix + '-' + str(self.begin) + \
                    '.' + self.suffix
            os.rename(self.destpath + 'tmp', fname)

            self.begin = int(time.time() * 1000)
            self.f = open(self.destpath + 'tmp', 'w')

    def start(self):
        # Setup the stream
        auth = OAuthHandler(self.consumer_key, self.consumer_secret)
        auth.set_access_token(self.access_token, self.access_secret)
        self.stream = Stream(auth, TweetListener(self))

        # Create the first file
        self.begin = int(time.time() * 1000)
        self.f = open(self.destpath + 'tmp', 'w')

        # Start the threads
        self.stream.sample(async=True)
        super(TweetDownloader, self).start()

    def stop(self):
        self.stream.disconnect()
        super(TweetDownloader, self).stop()
Exemple #28
0
 def get_streaming_data(self):
     twitter_stream = Stream(
         self.auth,
         twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab,
                          retweet_count=self.retweet_count))
     try:
         twitter_stream.sample()
     except Exception as e:
         print(e.__doc__)
Exemple #29
0
def main():
    l = StdOutListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, l)

    # This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    # stream.sample()
    stream.sample()
    def stream_tweets_sample(self):
        auth = self.authenticator.authenticate_app()
        stream = Stream(auth, self.listener)

        while self.listener.count != 0:
            try:
                stream.sample(languages=['en'], stall_warnings=True)
            except (ProtocolError, AttributeError):
                continue
Exemple #31
0
def twitter_sampling(time, name):
    # This handles Twitter authentication and the connection to Twitter Streaming API
    stream_listener = StdOutListener(time_limit=time,
                                     file_name='twitter_data_' + name + '.txt')
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, stream_listener)

    # This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
    stream.sample()
def main():
    try:
        l = StdOutListener()
        #Setting up random keys
        stream = Stream(auth, l)
        #Sample used for random tweets
        stream.sample()
    except Exception as e:
        print(e)
        print("Connection Error")
Exemple #33
0
def save_tweets():
    directory = _get_dir_absolute_path()
    filepath = path.join(directory, 'tweets.json')
    listener = DatabaseListener(number_tweets_to_save=200, filepath=filepath)
    stream = Stream(auth, listener)
    languages = ('en', )
    try:
        stream.sample(languages=languages)
    except KeyboardInterrupt:
        listener.file.close()
Exemple #34
0
    def _start_tweeter_stream(self):
        """
        Starts getting tweets
        """
        listener = StdOutListener(self._tweets_from_stream)
        auth = OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        stream = Stream(auth, listener)

        stream.sample()
        print 'queue size: {}'.format(self._tweets_from_stream.qsize())
class MyStreamer:

    def __init__(self, listener):
        auth = OAuthHandler(cred.CONSUMER_KEY, cred.CONSUMER_SECRET)
        auth.set_access_token(cred.ACCESS_TOKEN, cred.ACCESS_SECRET)
        api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
        self.listener = listener
        self.stream = Stream(auth=api.auth, listener=listener)

    def stream_tweets(self):
        self.stream.sample(languages=['en'], is_async=True)
Exemple #36
0
    def get_streaming_data(self):
        twitter_stream = Stream(self.auth, twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab, retweet_count = self.retweet_count, stats = self.stats, get_tweet_html = self.get_tweet_html ))
        try:
            twitter_stream.sample()
        except Exception as e:
            print(e.__doc__)

        lang, top_lang, top_tweets = self.stats.get_stats()
        print(Counter(lang))
        print(Counter(top_lang))
        print(top_tweets)
Exemple #37
0
    def buffered_stream_tweets(self, num_tweets, subscriber) -> None:
        listener = BufferedTweepyListener(num_tweets=num_tweets,
                                          subscriber=subscriber)

        stream = Stream(self.auth, listener)
        stream.filter(languages=["en"])
        stream.sample()

        threads = listener.threads
        for t in threads:
            t.join()
Exemple #38
0
 def start_streaming(self):
     # start streaming
     while True:
         try:
             stream = Stream(self.auth_handler, self)
             print('Listening...')
             stream.sample(languages=['en'])
         except Exception as e:
             # reconnect on exceptions
             print(e)
             continue
class Twitter():
    def __init__(self, conn):
        self.auth = OAuthHandler(CONSUMER_API_KEY, CONSUMER_API_SECRET)
        self.auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET)
        self.stream = Stream(self.auth, TwitterStreamListener(conn))

    def filter(self, array_of_track=[]):
        self.stream.filter(track=array_of_track)

    def sample(self):
        self.stream.sample()
Exemple #40
0
def main():
    # parser
    parser = argparse.ArgumentParser()
    parser.add_argument('--lang',
                        type=str,
                        required=True,
                        help='language: en/zh/ja')
    args = parser.parse_args()

    # open stream
    listener = QueueListener(args)
    stream = Stream(listener.auth, listener)  #, language='zh')

    # [stream filter]
    if args.lang == 'en':
        stream.filter(
            locations=[-122.75, 36.8, -121.75, 37.8, -74, 40, -73,
                       41])  # San Francisco or New York City
    elif args.lang == 'zh':
        stream.filter(languages=["zh"],
                      track=[
                          'I', 'you', 'http', 'www', 'co', '@', '#', '。', ',',
                          '!', '.', '!', ',', ':', ':', '』', ')', '...', '我',
                          '你', '他', '哈', '的', '是', '人', '-', '/'
                      ])
    elif args.lang == 'ja':
        stream.filter(languages=["ja"],
                      track=[
                          'I', 'you', 'http', 'www', 'co', '@', '#', '。', ',',
                          '!', '.', '!', ',', ':', ':', '』', ')', '...'
                      ])
    # stream.filter(locations=[-122.75,36.8,-121.75,37.8])  # San Francisco
    # stream.filter(locations=[-74,40,-73,41])  # New York City
    # stream.filter(languages=["en"], track=['python', 'obama', 'trump'])
    #
    # stream.filter(languages=["zh"], locations=[-180,-90,180,90])
    # stream.filter(languages=["ja"], track=['バイト'])

    try:
        while True:
            try:
                stream.sample()  # blocking!
            except KeyboardInterrupt:
                print('KEYBOARD INTERRUPT')
                return
            except (socket.error, http.client.HTTPException):
                global tcpip_delay
                print('TCP/IP Error: Restarting after %.2f seconds.' %
                      tcpip_delay)
                time.sleep(min(tcpip_delay, MAX_TCPIP_TIMEOUT))
                tcpip_delay += 0.25
    finally:
        stream.disconnect()
        print('Exit successful, corpus dumped in %s' % (listener.dumpfile))
Exemple #41
0
 def start_streaming(self):
     # start streaming
     while True:
         try:
             stream = Stream(self.auth_handler, self)
             print ('Listening...')
             stream.sample(languages=['en'])
         except Exception as e:
             # reconnect on exceptions
             print (e)
             continue
Exemple #42
0
def start_tweets_api():
    listener = TweetListener()
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    stream = Stream(auth, listener)
    while True:
        try:
            stream.sample(languages=['en'])
            # stream.filter(track=["Narendra Modi","PMModi","Modi"])
        except Exception as ex:
            print str(ex)
    def get_random_sample(self, size):
        """
        Get a random sample of tweets for T_n (10x size of T_b): We need a json? Sampling is
        """
        auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
        auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
        l = SampleStreamListener(size)
        stream = Stream(auth, l)
        stream.sample()
        tweets = l.get_sample()

        return tweets
Exemple #44
0
def save_tweets():
    directory = _get_dir_absolute_path()
    filepath = path.join(directory, 'tweets.json')

    listener = DatabaseListener(number_tweets_to_save=1000,
                                filepath=filepath)
    stream = Stream(auth, listener)
    languages = ('en',)
    try:
        stream.sample(languages=languages)
    except KeyboardInterrupt:
        listener.file.close()
Exemple #45
0
class TwitterProducer(Producer):
    TOPIC = "twitter"

    def __init__(self, *args, **kwargs):
        Producer.__init__(self, *args, **kwargs)
        self.auth = OAuthHandler(twitter.get("ckey"), twitter.get("csecret"))
        self.auth.set_access_token(twitter.get("atoken"), twitter.get("asecret"))
        self.li = TwitterListener(self.queue)
        self.stream = Stream(self.auth, self.li)

    def produce(self):
        self.stream.sample(languages=["en"])
Exemple #46
0
def senddata(c_socket):
    """

    """
    print('start sending data from Twitter to socket')
    # authentication based on the credentials
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(AccessToken, AccessSecret)
    # start sending data from the Streaming API
    twitter_stream = Stream(auth, Listener(c_socket))
    # twitter_stream.filter(languages=["en"])
    twitter_stream.sample(languages=["en"])
class TwitterPipe(GenericPipe):
    def __init__(self, consumer_key, consumer_secret,
                 access_token, access_token_secret,
                 callback_func
                 ):
        super().__init__()
        self.auth = OAuthHandler(consumer_key, consumer_secret)
        self.auth.set_access_token(access_token, access_token_secret)
        self.listener = TwitterListener(callback_func)
        self.stream = Stream(self.auth, self.listener)

    def start(self):
        self.stream.sample()
def stream_tweets():
    listener = TopicListener()

    # start streaming
    while True:
        try:
            stream = Stream(auth_handler, listener)
            print 'Listening...'
            stream.sample(languages=['en'])
        except Exception as e:
            # reconnect on exceptions
            print e
            continue
def main():
    logger = support.getLogger('tsv_writer')
    while True:
        try:
            data_streamer = tweet_access.JsonStreamer(filename=FILENAME)
            auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
            auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET)
            stream = Stream(auth, data_streamer)
            stream.sample(languages=LANGUAGES)
            #stream.filter(track=TAGS, languages=LANGUAGES, async=True)
        except Exception, e:
            logger.exception(e)
            stream.disconnect()
            time.sleep(60)
def main():
    """Connects to the stream and starts threads to write them to a file."""
    staticconf.YamlConfiguration(CONFIG_FILE)
    listener = QueueListener()
    auth = OAuthHandler(
        staticconf.read_string('twitter.consumer_key'),
        staticconf.read_string('twitter.consumer_secret'),
    )
    auth.set_access_token(
        staticconf.read_string('twitter.access_token'),
        staticconf.read_string('twitter.access_token_secret'),
    )

    writer_thread = threading.Thread(target=worker, args=(listener,))
    writer_thread.start()

    stream = Stream(auth, listener)

    print_status(listener)

    try:
        while True:
            try:
                
                stream.sample(languages=['en'])  # blocking!
                
            except KeyboardInterrupt:
                print('KEYBOARD INTERRUPT', file=sys.stderr)
                return
            except (socket.error, httplib.HTTPException):
                global tcpip_delay
                print(
                    'TCP/IP Error: Restarting after {delay} seconds.'.format(
                        delay=tcpip_delay,
                    ),
                    file=sys.stderr,
                )
                time.sleep(min(tcpip_delay, MAX_TCPIP_TIMEOUT))
                tcpip_delay += 0.25
    finally:
        print('Disconnecting stream', file=sys.stderr)
        stream.disconnect()
        print('Waiting for last tweets to finish processing', file=sys.stderr)
        # Send poison pill to writer thread and wait for it to exit
        listener.queue.put(None)
        listener.queue.join()
        print('Waiting for writer thread to finish', file=sys.stderr)
        writer_thread.join()
        print('Exit successful', file=sys.stderr)
Exemple #51
0
def stream_tweets():
    '''
    Connect to Twitter API and fetch relevant tweets from the stream
    '''
    listener = TopicListener()

    # start streaming
    while True:
        try:
            stream = Stream(auth_handler, listener)
            print 'Listening...'
            stream.sample(languages=['en'])
        except Exception as e:
            # reconnect on exceptions
            print e
            continue
    def get_recent_tweets(self, count=15):
        """
        Returns the most recent tweets

        :param count: the number of tweets to return
        """

        listener = StreamGatherer()
        stream = TweepyStream(self.auth, listener)
        stream.sample(count)

        while stream.running:
            # wait until we are finished
            pass

        return listener.results
Exemple #53
0
class TwitterTrends(StreamListener):
	def __init__(self):
		#initiate the reuqired authentication
		auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
		auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

		self.stream = Stream(auth, self)

		self.subscribers = []

	def on_data(self, data):
		for entry in data.split("\r\n"):
			processed = json.loads(entry)

			# Only include tweets that has a "topic", in this case one or more hashtags
			if "entities" in processed:
				if len(processed['entities']['hashtags']) > 0:
					tags = [u"#" + t['text'].lower() for t in processed['entities']['hashtags']]
					user = processed['user']['screen_name']
					text = processed['text']
					time = processed['created_at']

					tweet = Tweet(text, tags, time, user)

					for subscriber in self.subscribers:
						subscriber.on_tweet(tweet)

		if len(self.subscribers) == 0:
			self.stop()

	def on_error(self, error):
		print u"Error occurred: %s" % error

	def add_subscriber(self, subscriber):
		self.subscribers.append(subscriber)

	def remove_subscriber(self, subscriber):
		self.subscribers.remove(subscriber)

	def start(self):
		self.stream.sample()

	def stop(self):
		print 'Stopped'
		self.stream.disconnect()
Exemple #54
0
    def get_streaming_data(self):
        twitter_stream = Stream(self.auth, twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab, retweet_count = self.retweet_count, stats = self.stats, get_tweet_html = self.get_tweet_html ))
        try:
            twitter_stream.sample()
        except Exception as e:
            print(e.__doc__)

        lang, top_lang, top_tweets = self.stats.get_stats()
        print(Counter(lang))
        print(Counter(top_lang))
        print(len(top_tweets))

        self.c.execute("INSERT INTO lang_data VALUES (?,?, DATETIME('now'))", (str(list(Counter(lang).items())), str(list(Counter(top_lang).items()))))

        for t in top_tweets:
            self.c.execute("INSERT INTO twit_data VALUES (?, DATETIME('now'))", (t,))

        self.conn.commit()
Exemple #55
0
def fetch_from_twitter():
    """fetch tweets from twitter"""
    print "fetch_from_twitter => BEGIN"
    auth = twitter.get_auth()
    listener = twitter.Listener(settings.MAX_TWEETS)
    stream = Stream(auth, listener)
    stream.sample()
    print "fetch_from_twitter => SAMPLED"
    for data in listener.buffer:
        if Tweet.objects.filter(tweet_id = data['id']).count() == 0:
            tweet = Tweet(
                tweet_id = data['id'],
                text = data['text'],
                created_at = parser.parse(data['created_at']),
                username = data['user']['screen_name']
            )
            try:
                tweet.save()
            except Exception, e:
                print "Error saving tweet " + str(tweet) + "(" + e.message + ")"
Exemple #56
0
def run_crawler(auth_file,dest_dir):
  log_dir = os.path.join(dest_dir,"log")
  text_dir = os.path.join(dest_dir,"text")
  listener = TweetListener(log_dir,text_dir)
  auth_info = json.load(open(auth_file))
  consumer_key = auth_info["consumer_key"]
  consumer_secret = auth_info["consumer_secret"]
  access_token = auth_info["access_token"]
  access_token_secret = auth_info["access_token_secret"]

  auth = OAuthHandler(consumer_key,consumer_secret)
  auth.set_access_token(access_token,access_token_secret)
  stream = Stream(auth,listener)
  print "run at %s" %(datetime.utcnow())
  while True:
    try:
      stream.sample()
    except Exception as ex:
      print str(ex)
      pass
def main():
    auth = OAuthHandler(settings.consumer_key,
                        settings.consumer_secret)
    auth.set_access_token(settings.access_token,
                          settings.access_secret)

    server = couchdbkit.Server()
    db = server[settings.database]

    listener = CouchDBStreamListener(db)

    stream = Stream(auth, listener)
    while True:
        # if listener.tweet_count > 100000:
        #     break
        try:
            # stream.userstream()
            stream.sample()
        except Exception as e:
            print 'error: ', e
            print("Total tweets received: %d" % listener.tweet_count)
def main():

  options = parse_arguments()

  if options.convert:
    h = TweetHelper(options.url)
    h.copy_from_raw()
  else:
    if not os.path.isfile(options.config):
      log("Config file not found: ", options.config)
      exit(1)
      
    config = RawConfigParser()
    config.read(options.config)
  
    # Variables that contains the user credentials to access Twitter API
    access_token = config.get("Twitter", "access_token")
    access_token_secret = config.get("Twitter", "access_token_secret")
    consumer_key = config.get("Twitter", "consumer_key")
    consumer_secret = config.get("Twitter", "consumer_secret")

    # Determine which listener to use
    listener = None
    if options.watch:
      listener = WatchListener(options.max)
      log("Watching instead of storing tweets.")
    else:
      listener = IngestListener(options.max, options.url)
    
    # This handles Twitter authetification and the connection to Twitter Streaming API
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    stream = Stream(auth, listener)

    if options.topics:
      log("Tracking tweets with these topics: ", options.topics)
      stream.filter(track=options.topics)
    else:
      log("Reading a random sample of tweets.")
      stream.sample()
def sampleTwitter(keywords):

    config = ConfigParser.ConfigParser()
    config.read('conf/config.file')
    section = 'Twitter'
    con_key = config.get(section, 'consumer_key')
    con_secret = config.get(section, 'consumer_secret')
    token_key = config.get(section, 'token_key')
    token_secret = config.get(section, 'token_secret')

    listener = SampleStreamListener()
    auth = OAuthHandler(con_key, con_secret)
    auth.set_access_token(token_key, token_secret)

    stream = Stream(auth, listener)

    if len(keywords) == 0:
        stream.sample()
    else:
        stream.filter(None, [i.strip() for i in keywords.split()])

    print 'DONE READING SAMPLE TWITTER'