def fromUser(self, screen_name, tweets_number=10, is_bot=False): user = self.createUser(screen_name, is_bot) tweets = self.twitter_client.user_timeline(screen_name=screen_name, count=tweets_number) for i, status in enumerate(tweets): tweet = status._json text = tweet['text'] date = tweet['created_at'] entities = tweet['entities'] user_mentions = entities['user_mentions'] mentions_list = [] if len(user_mentions) > 0: for mention in user_mentions: mentions_list.append(mention['screen_name']) text_string = unicodedata.normalize('NFKD', text).encode('ascii','ignore') date_string = unicodedata.normalize('NFKD', date).encode('ascii','ignore') name_mentions_string = ",".join(mentions_list) Tweet.create( user = user, text = text_string, date = date_string, source = status.source, mentions = name_mentions_string )
def fromUser(self, screen_name, tweets_number=10, is_bot=False): user = self.createUser(screen_name, is_bot) tweets = self.twitter_client.user_timeline(screen_name=screen_name, count=tweets_number) for i, status in enumerate(tweets): tweet = status._json text = tweet['text'] date = tweet['created_at'] entities = tweet['entities'] user_mentions = entities['user_mentions'] mentions_list = [] if len(user_mentions) > 0: for mention in user_mentions: mentions_list.append(mention['screen_name']) text_string = unicodedata.normalize('NFKD', text).encode( 'ascii', 'ignore') date_string = unicodedata.normalize('NFKD', date).encode( 'ascii', 'ignore') name_mentions_string = ",".join(mentions_list) Tweet.create(user=user, text=text_string, date=date_string, source=status.source, mentions=name_mentions_string)
def index(): if request.method == 'GET': return render_template('index.html', tweets=Tweet.select()) elif request.method == 'POST': tweet_text = request.form['text'] if tweet_text: Tweet.create(text=tweet_text) return redirect('/')
def create(self, status): tweet = Tweet() create = tweet.create(status) if create is True: # Count inserted tweet and store to variable self.count += 1
def get_timeline(): TWITTER_API_KEY = '823HTWjuQJ9cL4uSX2ffR2sF5' TWITTER_API_SECRET = 'PtKYljMNHjlTcKtf4j5WfLb6MJSDDh6VyTM1lMPKxfFhLoRgj7' TWITTER_ACCESS_TOKEN = '2984322627-lSHzM70os2QNwucyKbQ3gqMZpSsPuo68wfdyb14' TWITTER_ACCESS_TOKEN_SECRET = 'DC9sh5d1r7bD0WLnc2Pd0dRd167FucH6ZoDUUZcKlogqi' twitterAPIfactory = TwitterAPIFactory(consumer_key=TWITTER_API_KEY, consumer_secret=TWITTER_API_SECRET, access_token_key=TWITTER_ACCESS_TOKEN, access_token_secret=TWITTER_ACCESS_TOKEN_SECRET) api = twitterAPIfactory.getAPI() last_tweet_recorded = Tweet.query.order_by(Tweet.created_at.desc()).first() kw = {} if last_tweet_recorded: tweet_id = int(last_tweet_recorded.tweet_id) kw['since_id'] = tweet_id results = api.GetHomeTimeline(count=150, **kw) for tweet in results: Tweet.create(tweet)
def get_timeline(): TWITTER_API_KEY = '823HTWjuQJ9cL4uSX2ffR2sF5' TWITTER_API_SECRET = 'PtKYljMNHjlTcKtf4j5WfLb6MJSDDh6VyTM1lMPKxfFhLoRgj7' TWITTER_ACCESS_TOKEN = '2984322627-lSHzM70os2QNwucyKbQ3gqMZpSsPuo68wfdyb14' TWITTER_ACCESS_TOKEN_SECRET = 'DC9sh5d1r7bD0WLnc2Pd0dRd167FucH6ZoDUUZcKlogqi' twitterAPIfactory = TwitterAPIFactory( consumer_key=TWITTER_API_KEY, consumer_secret=TWITTER_API_SECRET, access_token_key=TWITTER_ACCESS_TOKEN, access_token_secret=TWITTER_ACCESS_TOKEN_SECRET) api = twitterAPIfactory.getAPI() last_tweet_recorded = Tweet.query.order_by(Tweet.created_at.desc()).first() kw = {} if last_tweet_recorded: tweet_id = int(last_tweet_recorded.tweet_id) kw['since_id'] = tweet_id results = api.GetHomeTimeline(count=150, **kw) for tweet in results: Tweet.create(tweet)
def new(): username = request.form['username'] message = request.form['message'] # If user with username exists, get it. try: user = User.get(User.username == username) # Otherwise, create one with username. except: user = User.create(username=username) # Create tweet of message associated with user. tweet = Tweet.create(user=user, message=message) return str(tweet)
def cronTopic(): access_token = 'AAAAAAAAAAAAAAAAAAAAABcJYAAAAAAAVviSzyKtPYqYlHpZxoim6DHvfjI%3DU0slNkvBKQRynT62gbvQjEhAlE2PvzVZNia99xAdoJweI2OLqe' if request.method == 'POST': app.logger.info('request form: {}'.format(request.form)) topic = request.form.get('topic') elif request.method == 'GET': app.logger.info('request args: {}'.format(request.args)) topic = request.args.get('topic') if not topic: abort(400) since_id = request.form.get('since_id') app.logger.info('Topic params received: {} {}'.format(topic, since_id)) # Requests / 15-min window (user auth) 180 # Requests / 15-min window (app auth) 450 # 450 / (15 * 60) = 0.5 per second # thus 1 request every 2 seconds month_ago = datetime.datetime.utcnow() - datetime.timedelta(days=30) day_ago = datetime.datetime.utcnow() - datetime.timedelta(days=1) params = urllib.urlencode({ 'q': 'filter:links since:{} until:{} #{} -filter:retweets'.format( month_ago.strftime('%Y-%m-%d'), day_ago.strftime('%Y-%m-%d'), topic, ), 'result_type': 'recent', 'include_entities': 1, 'count': 100, 'since_id': since_id, }) # count, until, since_id, max_id app.logger.info('params {}'.format(params)) res = urlfetch.fetch( url='https://api.twitter.com/1.1/search/tweets.json?{}'.format(params), method=urlfetch.GET, headers={ 'Authorization': 'Bearer {}'.format(access_token), }, ) app.logger.info(res) cnt = 0 max_cnt = 90 if app.config['DEBUG'] else 1222333 while cnt < max_cnt: content = json.loads(res.content) metadata = content['search_metadata'] statuses = content['statuses'] # app.logger.info('Metadata: {}'.format(metadata)) # app.logger.info('Statuses: {}'.format(len(statuses))) cnt += len(statuses) for status in statuses: app.logger.info('Processing status') tweet = Tweet.create(topic, status) if 'next_results' not in metadata: app.logger.info('No more statuses') break else: app.logger.info('Fetching more results at {}'.format(metadata['next_results'])) res = urlfetch.fetch( url='{}{}'.format('https://api.twitter.com/1.1/search/tweets.json', metadata['next_results']), method=urlfetch.GET, headers={ 'Authorization': 'Bearer {}'.format(access_token), }, ) # continue with nothing, quota will be obliterated with this mail.send_mail( sender='*****@*****.**', to='*****@*****.**', subject='Cron topic {}'.format(topic), body='Scraped {} tweets for topic {}'.format(cnt, topic), ) app.logger.info('Scraped {} tweets for topic {}'.format(cnt, topic)) return Response('OK')
def cronTopic(): access_token = 'AAAAAAAAAAAAAAAAAAAAABcJYAAAAAAAVviSzyKtPYqYlHpZxoim6DHvfjI%3DU0slNkvBKQRynT62gbvQjEhAlE2PvzVZNia99xAdoJweI2OLqe' if request.method == 'POST': app.logger.info('request form: {}'.format(request.form)) topic = request.form.get('topic') elif request.method == 'GET': app.logger.info('request args: {}'.format(request.args)) topic = request.args.get('topic') if not topic: abort(400) since_id = request.form.get('since_id') app.logger.info('Topic params received: {} {}'.format(topic, since_id)) # Requests / 15-min window (user auth) 180 # Requests / 15-min window (app auth) 450 # 450 / (15 * 60) = 0.5 per second # thus 1 request every 2 seconds month_ago = datetime.datetime.utcnow() - datetime.timedelta(days=30) day_ago = datetime.datetime.utcnow() - datetime.timedelta(days=1) params = urllib.urlencode({ 'q': 'filter:links since:{} until:{} #{} -filter:retweets'.format( month_ago.strftime('%Y-%m-%d'), day_ago.strftime('%Y-%m-%d'), topic, ), 'result_type': 'recent', 'include_entities': 1, 'count': 100, 'since_id': since_id, }) # count, until, since_id, max_id app.logger.info('params {}'.format(params)) res = urlfetch.fetch( url='https://api.twitter.com/1.1/search/tweets.json?{}'.format(params), method=urlfetch.GET, headers={ 'Authorization': 'Bearer {}'.format(access_token), }, ) app.logger.info(res) cnt = 0 max_cnt = 90 if app.config['DEBUG'] else 1222333 while cnt < max_cnt: content = json.loads(res.content) metadata = content['search_metadata'] statuses = content['statuses'] # app.logger.info('Metadata: {}'.format(metadata)) # app.logger.info('Statuses: {}'.format(len(statuses))) cnt += len(statuses) for status in statuses: app.logger.info('Processing status') tweet = Tweet.create(topic, status) if 'next_results' not in metadata: app.logger.info('No more statuses') break else: app.logger.info('Fetching more results at {}'.format( metadata['next_results'])) res = urlfetch.fetch( url='{}{}'.format( 'https://api.twitter.com/1.1/search/tweets.json', metadata['next_results']), method=urlfetch.GET, headers={ 'Authorization': 'Bearer {}'.format(access_token), }, ) # continue with nothing, quota will be obliterated with this mail.send_mail( sender='*****@*****.**', to='*****@*****.**', subject='Cron topic {}'.format(topic), body='Scraped {} tweets for topic {}'.format(cnt, topic), ) app.logger.info('Scraped {} tweets for topic {}'.format(cnt, topic)) return Response('OK')
if __name__ == '__main__': cluster = Cluster(CASSANDRA_ADDRESS) session = cluster.connect(CASSANDRA_KEYSPACE) session.row_factory = dict_factory set_session(session) while True: with open('keywords.txt', 'r') as keywords: stream = api.GetStreamFilter(track=keywords.read().splitlines()) for tweet in stream: geo_dict = tweet.get('geo') if geo_dict: geo_type = geo_dict.get('type') geo_coordinates = geo_dict.get('coordinates') else: geo_type = None geo_coordinates = None user = tweet.get('user') if user: user_id = user.get('id') user_name = user.get('name') user_screen_name = user.get('screen_name') Tweet.create(id=tweet.get('id'), text=tweet.get('text'), user_id=user_id, user_name=user_name, user_screen_name=user_screen_name, geo_type=str(geo_type), geo_coordinates=geo_coordinates, created_at=str(tweet.get('created_at', '')), raw_tweet=json.dumps(tweet))
session = cluster.connect(CASSANDRA_KEYSPACE) session.row_factory = dict_factory set_session(session) while True: with open('keywords.txt', 'r') as keywords: stream = api.GetStreamFilter(track=keywords.read().splitlines()) for tweet in stream: geo_dict = tweet.get('geo') if geo_dict: geo_type = geo_dict.get('type') geo_coordinates = geo_dict.get('coordinates') else: geo_type = None geo_coordinates = None user = tweet.get('user') if user: user_id = user.get('id') user_name = user.get('name') user_screen_name = user.get('screen_name') Tweet.create( id=tweet.get('id'), text=tweet.get('text'), user_id=user_id, user_name=user_name, user_screen_name=user_screen_name, geo_type=str(geo_type), geo_coordinates=geo_coordinates, created_at=str(tweet.get('created_at', '')), raw_tweet=json.dumps(tweet) )