def start_stream(auth, l): while True: try: stream = Stream(auth, l) stream.sample() except: continue
def _get_tweets(self): auth = OAuthHandler(self.conf['oauth.consumer_key'], self.conf['oauth.consumer_secret']) auth.set_access_token(self.conf['oauth.access_token'], self.conf['oauth.access_token_secret']) stream = Stream(auth, QueueListener(self.queue)) stream.sample(languages=['en'])
def main(): # open stream listener = QueueListener() stream = Stream(listener.auth, listener, language='ja') # [stream filter] stream.filter(languages=["ja"], track=[ '私', 'あなた', '俺', 'ー', 'する', 'です', 'ます', 'けど', '何', '@', '#', '#', '。', ',', '!', '?', '…', '.', '!', '?', ',', ':', ':', '』', ')', ')', '...' ]) #stream.filter(languages=["ja"], track=['私','あなた','俺','ー','する','です','ます','けど','何','I', 'you', 'http', 'www', 'co', '@', '#', '#', '。', ',', '!','?','…', '.', '!','?', ',', ':', ':', '』', ')', ')', '...']) # Default Script# stream.filter(locations=[-122.75,36.8,-121.75,37.8]) # San Francisco # stream.filter(locations=[-74,40,-73,41]) # New York City # stream.filter(languages=["en"], track=['python', 'obama', 'trump']) try: while True: try: stream.sample() # blocking! except KeyboardInterrupt: print('KEYBOARD INTERRUPT') return except (socket.error, http.client.HTTPException): global tcpip_delay print('TCP/IP Error: Restarting after %.2f seconds.' % tcpip_delay) time.sleep(min(tcpip_delay, MAX_TCPIP_TIMEOUT)) tcpip_delay += 0.25 finally: stream.disconnect() print('Exit successful, corpus dumped in %s' % (listener.dumpfile))
def get_streaming_data(self): tweets_grabbed = 0 while (tweets_grabbed < self.num_tweets_to_grab): twitterStream = Stream(self.auth, listener(self.s, self.twit_utils, self.num_tweets_to_grab, self.retweet_count)) try: twitterStream.sample() except Exception as e: print("Error. Restarting Stream.... Error: ") print(e.__doc__) #print(e.message) print("Le Error! Restart") time.sleep(3) # Sleep for 5 minutes if error ocurred finally: tweets_grabbed = self.s.get_tweets_grabbed() print("tweets_grabbed = ", tweets_grabbed) lang, top_lang,love_words, swear_words, top_tweets, countries = self.s.get_stats() print(Counter(lang)) print(Counter(top_lang)) print("Love Words {} Swear Words {}".format(love_words, swear_words)) print(Counter(countries)) self.c.execute("INSERT INTO lang_data VALUES (?,?, DATETIME('now'))", (str(list(Counter(lang).items())), str(list(Counter(top_lang).items())))) self.c.execute("INSERT INTO love_data VALUES (?,?, DATETIME('now'))", (love_words, swear_words)) for t in top_tweets: self.c.execute("INSERT INTO twit_data VALUES (?, DATETIME('now'))", (t,)) self.c.execute("INSERT INTO country_data VALUES (?, DATETIME('now'))", (str(list(Counter(countries).items())),)) self.conn.commit()
def main(): while True: try: with utils.connect('ng') as conn: with conn.cursor() as cur: cur.execute("""CREATE TABLE IF NOT EXISTS {tablename}( id_str text PRIMARY KEY, source text, user_id text, created_at timestamp, text text)""".format(tablename=TABLENAME)) data_streamer = tweet.PostgresStreamer(conn=conn, tablename=TABLENAME) auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET) stream = Stream(auth, data_streamer) stream.sample(languages=LANGUAGES) conn.commit() except KeyboardInterrupt: stream.disconnect() break except (IndexError, ConnectionError, ProtocolError, ReadTimeoutError): #logger.exception(e) stream.disconnect() time.sleep(90) continue
def main(): """Connects to the stream and starts threads to write them to a file.""" listener = QueueListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) writer_thread = threading.Thread(target=worker, args=(listener, )) writer_thread.start() stream = Stream(auth, listener) print_status(listener) while True: try: stream.sample() # blocking! except KeyboardInterrupt: print 'KEYBOARD INTERRUPT:' return except (socket.error, httplib.HTTPException): global tcpip_delay print('TCP/IP Error: Restarting after ' '{} seconds.'.format(tcpip_delay)) time.sleep(min(tcpip_delay, MAX_TCPIP_TIMEOUT)) tcpip_delay += 0.25 finally: print 'Disconnecting stream' stream.disconnect() print 'Waiting for last tweets to finish processing' # Send poison pill to writer thread and wait for it to exit listener.queue.put(None) listener.queue.join() print 'Waiting for writer thread to finish' writer_thread.join() print 'Exit successful'
def main(): # {path}は自分の環境にあわせる db_path = str(Path.home()) + "{path}/Conversation." + date.today( ).strftime("20%y.%m.%d") + ".db" parser = argparse.ArgumentParser() parser.add_argument('--new', type=int, default=-1, help='-1 indicates new database...') args = parser.parse_args() if args.new == -1: make_db(db_path) listener = QueueListener(db_path) stream = Stream(listener.auth, listener) print("Listening...\n") delay = 0.25 try: while True: try: stream.sample() except KeyboardInterrupt: print('Stopped') return except urllib3.exceptions.ProtocolError as e: print("Incomplete read", e) except urllib3.exceptions.ReadTimeoutError as e: print("Read Timeout", e) except (socket.error, http.client.HTTPException): print("HTTP error waiting for a few seconds") time.sleep(delay) delay += 0.25 finally: stream.disconnect()
def interactive(username=None, password=None, filenames=None): if not username: username = raw_input('Username: ').strip() if not password: password = getpass("Password: ").strip() s = Stream(username, password, TWSSBuildClassifierListner()) s.sample()
def start_stream(): while True: try: listener = Streamlistener() twitterStream = Stream( auth, listener) #initialize Stream object with a time out limit # twitterStream.filter(track=keyword_list) #call the filter method to run the Stream Object #https://dev.twitter.com/streaming/reference/get/statuses/sample twitterStream.sample() #https://github.com/azurro/country-bounding-boxes/blob/master/dataset/ph.json #twitterStream.filter(locations=[112.16672,4.3833541,127.0737203,21.5296298]) #call the filter method to run the Stream Object #twitterStream.filter(locations=[12.865353, -168.344469, 67.929266, -42.133528]) #call the filter method to run the Stream Object #twitterStream.filter(locations=[-56.286063, -29.301501,74.250793, 174.253191,-63.874556, -170.629625,70.718096, -29.828842 ]) #call the filter method to run the Stream Object #twitterStream.filter(locations=[-69.624350, -23.9022753, -57.353627, -7.164010 ]) #call the filter method to run the Stream Object except Exception, e: # Oh well, reconnect and keep trucking print 'Generic exception happened. Continuing..', str(e) pass except KeyboardInterrupt: # Or however you want to exit this loop twitterStream.disconnect() break
def stream_tweets(self): listener = StdOutListener() auth = OAuthHandler(config.api_key, config.api_secret_key) auth.set_access_token(config.access_token, config.access_token_secret) stream = Stream(auth, listener, tweet_mode='extended') stream.sample()
class streamsample(StreamListener): # constructor for stream def __init__(self): l = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) self.stream = Stream(auth, l) def printtest(self): self.stream.sample() # Sends tweet to the sqlite3 database def todatabase(self, numtweets = 100): connection = sqlite3.connect(DATABASE_NAME) cursor = connection.cursor() try: count = 0 while count < numtweets: for tweet_json in self.stream: #check whether the tweet is actually a tweet if tweet.get('text') and tweet['user']['lang'] == 'en': parsetweet(tweet_json) break count += 1 except stream.ConnectionError, e: print "Disconnected"
class TwitterIngestor(): """ Ingests data from Twitter """ def __init__(self, twitter_config, callback): """ Creates an OAuth handler and a Twitter stream :param twitter_config: :param callback: """ self.callback = callback self.auth = OAuthHandler(twitter_config['customer_key'], twitter_config['customer_secret']) self.auth.set_access_token(twitter_config['access_token'], twitter_config['access_secret']) self.twitter_stream = Stream(self.auth, TwitterListener(self.callback)) logger.info('Initialized Twitter Ingestor') def start(self): """ Starts Twitter streaming :return: """ logger.info('Starting Twitter feed ingestion') self.twitter_stream.sample(is_async=True, languages=['en']) logger.info('Started Twitter feed ingestion') def stop(self): """ Requests Twitter stream to start receiving messages :return: """ logger.info('Stopping Twitter feed ingestion') self.twitter_stream.disconnect() logger.info('Stopped Twitter feed ingestion')
def print_to_terminal(): listener = PrintListener() stream = Stream(auth, listener) #Tweets in English Language Only languages = ('en',) stream.sample(languages=languages) stream.sample()
def printStream(): #This handles Twitter authetification and the connection to Twitter Streaming API l = StdOutListener() auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) stream = Stream(auth, l) stream.sample()
def get_streaming_data(self): twitter_stream = Stream( self.auth, twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab, retweet_count=self.retweet_count, stats=self.stats, get_tweet_html=self.get_tweet_html)) try: twitter_stream.sample() except Exception as e: print(e.__doc__) lang, top_lang, top_tweets = self.stats.get_stats() print(Counter(lang)) print(Counter(top_lang)) print(len(top_tweets)) self.c.execute("INSERT INTO lang_data VALUES (?,?, DATETIME('now'))", (str(list(Counter(lang).items())), str(list(Counter(top_lang).items())))) for t in top_tweets: self.c.execute("INSERT INTO twit_data VALUES (?, DATETIME('now'))", (t, )) self.conn.commit()
def run_sample(): l = TweetListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) stream.sample()
def sample(): '''Calling the sampling API provided by twitter.''' listener = TweetStreamListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) stream = Stream(auth, listener) stream.sample()
def get_streaming_data(self): twitter_stream = Stream( self.auth, twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab, retweet_count=self.retweet_count) ) try: twitter_stream.sample() except Exception as e: print(e.__doc__)
def main(): listener = Listener(100000) auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, listener) stream.sample()
def sample(): '''Get random sample of all public statuses.''' listener = TweetStreamListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) stream = Stream(auth, listener) stream.sample()
def save_tweets(): print('hi') directory = _get_dir_absolute_path() filepath = path.join(directory, 'tweets.json') listener = DatabaseListener(number_tweets_to_save=150) stream = Stream(auth, listener) languages = ('en,fa',) stream.sample(languages=languages)
def get_tweets(): l = INDXListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) if len(args['words']) > 0: stream.filter(track=args['words'].split(",")) else: stream.sample()
def _get_tweets(self): auth = OAuthHandler( self.conf['oauth.consumer_key'], self.conf['oauth.consumer_secret']) auth.set_access_token( self.conf['oauth.access_token'], self.conf['oauth.access_token_secret']) stream = Stream(auth, QueueListener(self.queue)) stream.sample(languages=['en'])
class TweetDownloader(safethread.SafeThread, StreamListener): def __init__(self, destpath, consumer_key, consumer_secret, access_token, access_secret, window = 10000, verbose = False): super(TweetDownloader, self).__init__(name="TweetDownloader") self.destpath = destpath self.consumer_key = consumer_key self.consumer_secret = consumer_secret self.access_token = access_token self.access_secret = access_secret self.prefix = 'tweets' self.suffix = 'txt' self.window = window self.buf = collections.deque() self.stopped = True # Write the tweet text to the current file. May throw an error if the file # is currently being switched out (i.e. writing at the end of a window). def write(self, vals): self.buf.appendleft(json.dumps(vals)) def action(self): if len(self.buf) > 0: self.f.write(self.buf.pop() + '\n') if ((time.time() * 1000) - self.begin > self.window): self.f.close() fname = self.destpath + self.prefix + '-' + str(self.begin) + \ '.' + self.suffix os.rename(self.destpath + 'tmp', fname) self.begin = int(time.time() * 1000) self.f = open(self.destpath + 'tmp', 'w') def start(self): # Setup the stream auth = OAuthHandler(self.consumer_key, self.consumer_secret) auth.set_access_token(self.access_token, self.access_secret) self.stream = Stream(auth, TweetListener(self)) # Create the first file self.begin = int(time.time() * 1000) self.f = open(self.destpath + 'tmp', 'w') # Start the threads self.stream.sample(async=True) super(TweetDownloader, self).start() def stop(self): self.stream.disconnect() super(TweetDownloader, self).stop()
def get_streaming_data(self): twitter_stream = Stream( self.auth, twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab, retweet_count=self.retweet_count)) try: twitter_stream.sample() except Exception as e: print(e.__doc__)
def main(): l = StdOutListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, l) # This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby' # stream.sample() stream.sample()
def stream_tweets_sample(self): auth = self.authenticator.authenticate_app() stream = Stream(auth, self.listener) while self.listener.count != 0: try: stream.sample(languages=['en'], stall_warnings=True) except (ProtocolError, AttributeError): continue
def twitter_sampling(time, name): # This handles Twitter authentication and the connection to Twitter Streaming API stream_listener = StdOutListener(time_limit=time, file_name='twitter_data_' + name + '.txt') auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, stream_listener) # This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby' stream.sample()
def main(): try: l = StdOutListener() #Setting up random keys stream = Stream(auth, l) #Sample used for random tweets stream.sample() except Exception as e: print(e) print("Connection Error")
def save_tweets(): directory = _get_dir_absolute_path() filepath = path.join(directory, 'tweets.json') listener = DatabaseListener(number_tweets_to_save=200, filepath=filepath) stream = Stream(auth, listener) languages = ('en', ) try: stream.sample(languages=languages) except KeyboardInterrupt: listener.file.close()
def _start_tweeter_stream(self): """ Starts getting tweets """ listener = StdOutListener(self._tweets_from_stream) auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, listener) stream.sample() print 'queue size: {}'.format(self._tweets_from_stream.qsize())
class MyStreamer: def __init__(self, listener): auth = OAuthHandler(cred.CONSUMER_KEY, cred.CONSUMER_SECRET) auth.set_access_token(cred.ACCESS_TOKEN, cred.ACCESS_SECRET) api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) self.listener = listener self.stream = Stream(auth=api.auth, listener=listener) def stream_tweets(self): self.stream.sample(languages=['en'], is_async=True)
def get_streaming_data(self): twitter_stream = Stream(self.auth, twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab, retweet_count = self.retweet_count, stats = self.stats, get_tweet_html = self.get_tweet_html )) try: twitter_stream.sample() except Exception as e: print(e.__doc__) lang, top_lang, top_tweets = self.stats.get_stats() print(Counter(lang)) print(Counter(top_lang)) print(top_tweets)
def buffered_stream_tweets(self, num_tweets, subscriber) -> None: listener = BufferedTweepyListener(num_tweets=num_tweets, subscriber=subscriber) stream = Stream(self.auth, listener) stream.filter(languages=["en"]) stream.sample() threads = listener.threads for t in threads: t.join()
def start_streaming(self): # start streaming while True: try: stream = Stream(self.auth_handler, self) print('Listening...') stream.sample(languages=['en']) except Exception as e: # reconnect on exceptions print(e) continue
class Twitter(): def __init__(self, conn): self.auth = OAuthHandler(CONSUMER_API_KEY, CONSUMER_API_SECRET) self.auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET) self.stream = Stream(self.auth, TwitterStreamListener(conn)) def filter(self, array_of_track=[]): self.stream.filter(track=array_of_track) def sample(self): self.stream.sample()
def main(): # parser parser = argparse.ArgumentParser() parser.add_argument('--lang', type=str, required=True, help='language: en/zh/ja') args = parser.parse_args() # open stream listener = QueueListener(args) stream = Stream(listener.auth, listener) #, language='zh') # [stream filter] if args.lang == 'en': stream.filter( locations=[-122.75, 36.8, -121.75, 37.8, -74, 40, -73, 41]) # San Francisco or New York City elif args.lang == 'zh': stream.filter(languages=["zh"], track=[ 'I', 'you', 'http', 'www', 'co', '@', '#', '。', ',', '!', '.', '!', ',', ':', ':', '』', ')', '...', '我', '你', '他', '哈', '的', '是', '人', '-', '/' ]) elif args.lang == 'ja': stream.filter(languages=["ja"], track=[ 'I', 'you', 'http', 'www', 'co', '@', '#', '。', ',', '!', '.', '!', ',', ':', ':', '』', ')', '...' ]) # stream.filter(locations=[-122.75,36.8,-121.75,37.8]) # San Francisco # stream.filter(locations=[-74,40,-73,41]) # New York City # stream.filter(languages=["en"], track=['python', 'obama', 'trump']) # # stream.filter(languages=["zh"], locations=[-180,-90,180,90]) # stream.filter(languages=["ja"], track=['バイト']) try: while True: try: stream.sample() # blocking! except KeyboardInterrupt: print('KEYBOARD INTERRUPT') return except (socket.error, http.client.HTTPException): global tcpip_delay print('TCP/IP Error: Restarting after %.2f seconds.' % tcpip_delay) time.sleep(min(tcpip_delay, MAX_TCPIP_TIMEOUT)) tcpip_delay += 0.25 finally: stream.disconnect() print('Exit successful, corpus dumped in %s' % (listener.dumpfile))
def start_streaming(self): # start streaming while True: try: stream = Stream(self.auth_handler, self) print ('Listening...') stream.sample(languages=['en']) except Exception as e: # reconnect on exceptions print (e) continue
def start_tweets_api(): listener = TweetListener() auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, listener) while True: try: stream.sample(languages=['en']) # stream.filter(track=["Narendra Modi","PMModi","Modi"]) except Exception as ex: print str(ex)
def get_random_sample(self, size): """ Get a random sample of tweets for T_n (10x size of T_b): We need a json? Sampling is """ auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) l = SampleStreamListener(size) stream = Stream(auth, l) stream.sample() tweets = l.get_sample() return tweets
def save_tweets(): directory = _get_dir_absolute_path() filepath = path.join(directory, 'tweets.json') listener = DatabaseListener(number_tweets_to_save=1000, filepath=filepath) stream = Stream(auth, listener) languages = ('en',) try: stream.sample(languages=languages) except KeyboardInterrupt: listener.file.close()
class TwitterProducer(Producer): TOPIC = "twitter" def __init__(self, *args, **kwargs): Producer.__init__(self, *args, **kwargs) self.auth = OAuthHandler(twitter.get("ckey"), twitter.get("csecret")) self.auth.set_access_token(twitter.get("atoken"), twitter.get("asecret")) self.li = TwitterListener(self.queue) self.stream = Stream(self.auth, self.li) def produce(self): self.stream.sample(languages=["en"])
def senddata(c_socket): """ """ print('start sending data from Twitter to socket') # authentication based on the credentials auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(AccessToken, AccessSecret) # start sending data from the Streaming API twitter_stream = Stream(auth, Listener(c_socket)) # twitter_stream.filter(languages=["en"]) twitter_stream.sample(languages=["en"])
class TwitterPipe(GenericPipe): def __init__(self, consumer_key, consumer_secret, access_token, access_token_secret, callback_func ): super().__init__() self.auth = OAuthHandler(consumer_key, consumer_secret) self.auth.set_access_token(access_token, access_token_secret) self.listener = TwitterListener(callback_func) self.stream = Stream(self.auth, self.listener) def start(self): self.stream.sample()
def stream_tweets(): listener = TopicListener() # start streaming while True: try: stream = Stream(auth_handler, listener) print 'Listening...' stream.sample(languages=['en']) except Exception as e: # reconnect on exceptions print e continue
def main(): logger = support.getLogger('tsv_writer') while True: try: data_streamer = tweet_access.JsonStreamer(filename=FILENAME) auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET) stream = Stream(auth, data_streamer) stream.sample(languages=LANGUAGES) #stream.filter(track=TAGS, languages=LANGUAGES, async=True) except Exception, e: logger.exception(e) stream.disconnect() time.sleep(60)
def main(): """Connects to the stream and starts threads to write them to a file.""" staticconf.YamlConfiguration(CONFIG_FILE) listener = QueueListener() auth = OAuthHandler( staticconf.read_string('twitter.consumer_key'), staticconf.read_string('twitter.consumer_secret'), ) auth.set_access_token( staticconf.read_string('twitter.access_token'), staticconf.read_string('twitter.access_token_secret'), ) writer_thread = threading.Thread(target=worker, args=(listener,)) writer_thread.start() stream = Stream(auth, listener) print_status(listener) try: while True: try: stream.sample(languages=['en']) # blocking! except KeyboardInterrupt: print('KEYBOARD INTERRUPT', file=sys.stderr) return except (socket.error, httplib.HTTPException): global tcpip_delay print( 'TCP/IP Error: Restarting after {delay} seconds.'.format( delay=tcpip_delay, ), file=sys.stderr, ) time.sleep(min(tcpip_delay, MAX_TCPIP_TIMEOUT)) tcpip_delay += 0.25 finally: print('Disconnecting stream', file=sys.stderr) stream.disconnect() print('Waiting for last tweets to finish processing', file=sys.stderr) # Send poison pill to writer thread and wait for it to exit listener.queue.put(None) listener.queue.join() print('Waiting for writer thread to finish', file=sys.stderr) writer_thread.join() print('Exit successful', file=sys.stderr)
def stream_tweets(): ''' Connect to Twitter API and fetch relevant tweets from the stream ''' listener = TopicListener() # start streaming while True: try: stream = Stream(auth_handler, listener) print 'Listening...' stream.sample(languages=['en']) except Exception as e: # reconnect on exceptions print e continue
def get_recent_tweets(self, count=15): """ Returns the most recent tweets :param count: the number of tweets to return """ listener = StreamGatherer() stream = TweepyStream(self.auth, listener) stream.sample(count) while stream.running: # wait until we are finished pass return listener.results
class TwitterTrends(StreamListener): def __init__(self): #initiate the reuqired authentication auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) self.stream = Stream(auth, self) self.subscribers = [] def on_data(self, data): for entry in data.split("\r\n"): processed = json.loads(entry) # Only include tweets that has a "topic", in this case one or more hashtags if "entities" in processed: if len(processed['entities']['hashtags']) > 0: tags = [u"#" + t['text'].lower() for t in processed['entities']['hashtags']] user = processed['user']['screen_name'] text = processed['text'] time = processed['created_at'] tweet = Tweet(text, tags, time, user) for subscriber in self.subscribers: subscriber.on_tweet(tweet) if len(self.subscribers) == 0: self.stop() def on_error(self, error): print u"Error occurred: %s" % error def add_subscriber(self, subscriber): self.subscribers.append(subscriber) def remove_subscriber(self, subscriber): self.subscribers.remove(subscriber) def start(self): self.stream.sample() def stop(self): print 'Stopped' self.stream.disconnect()
def get_streaming_data(self): twitter_stream = Stream(self.auth, twitter_listener(num_tweets_to_grab=self.num_tweets_to_grab, retweet_count = self.retweet_count, stats = self.stats, get_tweet_html = self.get_tweet_html )) try: twitter_stream.sample() except Exception as e: print(e.__doc__) lang, top_lang, top_tweets = self.stats.get_stats() print(Counter(lang)) print(Counter(top_lang)) print(len(top_tweets)) self.c.execute("INSERT INTO lang_data VALUES (?,?, DATETIME('now'))", (str(list(Counter(lang).items())), str(list(Counter(top_lang).items())))) for t in top_tweets: self.c.execute("INSERT INTO twit_data VALUES (?, DATETIME('now'))", (t,)) self.conn.commit()
def fetch_from_twitter(): """fetch tweets from twitter""" print "fetch_from_twitter => BEGIN" auth = twitter.get_auth() listener = twitter.Listener(settings.MAX_TWEETS) stream = Stream(auth, listener) stream.sample() print "fetch_from_twitter => SAMPLED" for data in listener.buffer: if Tweet.objects.filter(tweet_id = data['id']).count() == 0: tweet = Tweet( tweet_id = data['id'], text = data['text'], created_at = parser.parse(data['created_at']), username = data['user']['screen_name'] ) try: tweet.save() except Exception, e: print "Error saving tweet " + str(tweet) + "(" + e.message + ")"
def run_crawler(auth_file,dest_dir): log_dir = os.path.join(dest_dir,"log") text_dir = os.path.join(dest_dir,"text") listener = TweetListener(log_dir,text_dir) auth_info = json.load(open(auth_file)) consumer_key = auth_info["consumer_key"] consumer_secret = auth_info["consumer_secret"] access_token = auth_info["access_token"] access_token_secret = auth_info["access_token_secret"] auth = OAuthHandler(consumer_key,consumer_secret) auth.set_access_token(access_token,access_token_secret) stream = Stream(auth,listener) print "run at %s" %(datetime.utcnow()) while True: try: stream.sample() except Exception as ex: print str(ex) pass
def main(): auth = OAuthHandler(settings.consumer_key, settings.consumer_secret) auth.set_access_token(settings.access_token, settings.access_secret) server = couchdbkit.Server() db = server[settings.database] listener = CouchDBStreamListener(db) stream = Stream(auth, listener) while True: # if listener.tweet_count > 100000: # break try: # stream.userstream() stream.sample() except Exception as e: print 'error: ', e print("Total tweets received: %d" % listener.tweet_count)
def main(): options = parse_arguments() if options.convert: h = TweetHelper(options.url) h.copy_from_raw() else: if not os.path.isfile(options.config): log("Config file not found: ", options.config) exit(1) config = RawConfigParser() config.read(options.config) # Variables that contains the user credentials to access Twitter API access_token = config.get("Twitter", "access_token") access_token_secret = config.get("Twitter", "access_token_secret") consumer_key = config.get("Twitter", "consumer_key") consumer_secret = config.get("Twitter", "consumer_secret") # Determine which listener to use listener = None if options.watch: listener = WatchListener(options.max) log("Watching instead of storing tweets.") else: listener = IngestListener(options.max, options.url) # This handles Twitter authetification and the connection to Twitter Streaming API auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = Stream(auth, listener) if options.topics: log("Tracking tweets with these topics: ", options.topics) stream.filter(track=options.topics) else: log("Reading a random sample of tweets.") stream.sample()
def sampleTwitter(keywords): config = ConfigParser.ConfigParser() config.read('conf/config.file') section = 'Twitter' con_key = config.get(section, 'consumer_key') con_secret = config.get(section, 'consumer_secret') token_key = config.get(section, 'token_key') token_secret = config.get(section, 'token_secret') listener = SampleStreamListener() auth = OAuthHandler(con_key, con_secret) auth.set_access_token(token_key, token_secret) stream = Stream(auth, listener) if len(keywords) == 0: stream.sample() else: stream.filter(None, [i.strip() for i in keywords.split()]) print 'DONE READING SAMPLE TWITTER'