def main(): api = get_twitter_api() user = api.me() sys.stderr.write("-- Name: %s -- ID: %s \n" % (user.name, user.id)) arg_parser = get_parser() args = arg_parser.parse_args() auth = get_twitter_auth() sys.stderr.write("Start scraping tweets:...\n") while True: try: # start using Streaming API to collect data: twitter_stream = \ Stream(auth, MyListener(api, args.lang, DataBaseManager(args.database))) twitter_stream.filter(locations=city[args.city]) except IncompleteRead as e: sys.stderr.write('[Error!] on IncompleteRead {}\n'.format(e)) time.sleep(10) continue except KeyboardInterrupt as ex: # Or however you want to exit this loop sys.stderr.write('You cancel the program! {}\n'.format(ex)) break except Exception as ept: sys.stderr.write('[Error!] on Exception {}\n'.format(ept)) time.sleep(10) continue
class CustomListener(StreamListener): """Custom StreamListener for streaming Twitter data.""" def __init__(self, fname): safe_fname = format_filename(fname) self.outfile = "stream_%s.jsonl" % safe_fname def on_data(self, data): try: with open(self.outfile, 'a') as f: # Create a file for appending f.write(data) return True except BaseException as e: sys.stderr.write("Error on_data: {}\n".format(e)) time.sleep(5) return True def on_error(self, status): if status == 420: sys.stderr.write("Rate limit exceeded\n") return False else: sys.stderr.write("Error {}\n".format(status)) retrun True def format_filename(fname): """ Convert fname into a safe string for a file name. Return: string """ return ''.join(convert_valid(one_char) for one_char in fname) def convert_valid(one_char): """Convert a character into '_' if "invalid". Return: string """ valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) if one_char in valid_chars: return one_char else: return '_' if __name__ == '__main__': query = sys.argv[1:] # list of CLI arguments query_fname = ' '.join(query) # string auth = get_twitter_auth() twitter_stream = Stream(auth, CustomListener(query_fname)) twitter_stream.filter(track=query, async=True)
def observe_tweets(observable): class TweetListener(StreamListener): def on_data(self, data): insert_tweet(json.loads(data)) observable.on_next(data) return True def on_error(self, status): if status == 420: observable.on_error(status) sys.stderr.write("Rate limit exceeded\n".format(status)) return False else: sys.stderr.write("Error {}\n".format(status)) return True tweet_listener = TweetListener() auth = get_twitter_auth() stream = Stream(auth, tweet_listener) stream.filter(track=topics, locations=locations)
sys.stderr.write("Rate limit exceeded\n".format(status)) return False else: sys.stderr.write("Error {}\n".format(status)) return True def format_filename(fname): """Convert fname into a safe string for a file name. Return: string """ return ''.join(convert_valid(one_char) for one_char in fname) def convert_valid(one_char): """Convert a character into '_' if "invalid". Return: string """ valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) if one_char in valid_chars: return one_char else: return '_' if __name__ == '__main__': query = sys.argv[1:] # list of CLI arguments query_fname = ' '.join(query) # string auth = get_twitter_auth() twitter_stream = Stream(auth, CustomListener(query_fname)) twitter_stream.filter(track=query, is_async=True)
def get_wordcloud(): words = dict(counter.most_common(30)) wordcloud = WordCloud( background_color='white', font_path=font_path, mask=image_mask, stopwords=stopwords + punctuation, ).generate_from_frequencies(words) return wordcloud def display_wordcloud(): wordcloud = get_wordcloud() plt.imshow(wordcloud.recolor(color_func=image_colors), interpolation='bilinear') plt.axis('off') plt.pause(10.0) return True if __name__ == '__main__': parser = get_parser() args = parser.parse_args() query = [args.query] auth = twitter_client.get_twitter_auth() stream = Stream(auth, MyListener(query)) stream.filter(track=query, async=True) while True: if counter: display_wordcloud()
return False else: sys.stderr.write("Error {}\n".format(status)) return True def format_filename(fname): """Convert fname into a safe string for a file name. Return: string """ return ''.join(convert_valid(one_char) for one_char in fname) def convert_valid(one_char): """Convert a character into '_' if "invalid". Return: string """ valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) if one_char in valid_chars: return one_char else: return '_' if __name__ == '__main__': query = sys.argv[1:] # list of CLI arguments query_fname = ' '.join(query) # string auth = get_twitter_auth() twitter_stream = Stream(auth, CustomListener(query_fname)) twitter_stream.filter(track=query, async=True)