def main(argv): opt = Option.validate(argv) analyzer = Analyzer() listener = Listener(analyzer, opt.hdfs_path, opt.local_path, opt.roll_size) auth = OAuthHandler(opt.consumer_key, opt.consumer_secret) auth.set_access_token(opt.access_token_key, opt.access_token_secret) if not os.path.exists(Util.TMP_DIR + '/' + Util.TWEETS): os.makedirs(Util.TMP_DIR + '/' + Util.TWEETS) if not os.path.exists(Util.TMP_DIR + '/' + Util.WORDCLOUD): os.makedirs(Util.TMP_DIR + '/' + Util.WORDCLOUD) # create new local paths if opt.local_path: if not os.path.exists(opt.local_path + '/' + Util.TWEETS): os.makedirs(opt.local_path + '/' + Util.TWEETS) if not os.path.exists(opt.local_path + '/' + Util.WORDCLOUD): os.makedirs(opt.local_path + '/' + Util.WORDCLOUD) stream = Stream(auth, listener) stream.filter(track=opt.keywords)
hdfs_path=None, local_path=None): tweet_files = os.listdir(tmp_tweet_dir) for tf in tweet_files: if hdfs_path: hadoopy.put(tmp_tweet_dir + '/' + tf, hdfs_path + Util.TWEETS + '/' + tf[-24:-4] + '.csv') if local_path: shutil.copy(tmp_tweet_dir + '/' + tf, local_path + Util.TWEETS + '/' + tf[-24:-4] + '.csv') os.remove(tmp_tweet_dir + '/' + tf) wordcloud_files = os.listdir(tmp_wordcloud_dir) for wf in wordcloud_files: if hdfs_path: hadoopy.put(tmp_wordcloud_dir + '/' + wf, hdfs_path + Util.WORDCLOUD + '/' + wf[-24:-4] + '.csv') if local_path: shutil.copy( tmp_wordcloud_dir + '/' + wf, local_path + Util.WORDCLOUD + '/' + wf[-24:-4] + '.csv') os.remove(tmp_wordcloud_dir + '/' + wf) # our program's main entry point if __name__ == '__main__': opt = Option.validate(sys.argv[1:]) main(opt) # main(sys.argv[1:])