def get_tweets(): page = int(request.args.get('page')) limit = int(request.args.get('limit')) tweets_models = Tweet.select().order_by(Tweet.date.desc()).paginate(page, limit) tweets = [Tweet.to_dict(a)['tweet_data'] for a in tweets_models] body = { 'tweets': tweets, } return Response(json.dumps(body), mimetype='application/json')
def check_tweets(self): while True: for user in self.settings.users: try: tweets = self.api.user_timeline(id=user, count=200, exclude_replies=True, include_rts=False, tweet_mode='extended') tweets_saved = 0 for tweet in tweets: try: tweets_saved += Tweet.from_tweepy(tweet).save( force_insert=True) except IntegrityError: pass if tweets_saved > 0: logger.debug( '[check_tweets] Got %d new tweets from %s', tweets_saved, user) self.reload_brain() except tweepy.TweepError as exc: logger.warn( '[check_tweets] Error while getting tweets of %s: %s', user, exc) except Exception as exc: logger.error( '[check_tweets] Error while getting tweets of %s:', user) logger.exception(exc) gevent.sleep(10)
def on_data(self, data): data = json.loads(data) if self.should_reject(data): return True retweet_data = data['retweeted_status'] annotated_data = cprop.annotate_data([retweet_data['text']], self.model, self.vectorizer) category = pydash.get(annotated_data, 'top_cat.0') category_id = pydash.get(annotated_data, 'top_cat_id.0') score = pydash.get(annotated_data, 'scores.0') if self.should_reject_by_topic(score, category_id): return True tweet_info = { 'text': retweet_data['text'], 'date': retweet_data['created_at'], 'tweet_data': retweet_data, 'category': category, 'category_score': score, 'model_data': pydash.get(annotated_data, 'labels'), } tweet_info = {**tweet_info, **retweet_data} print(tweet_info['category'], tweet_info['text']) print('https://twitter.com/i/web/status/' + tweet_info['id_str']) print() try: tweet_model = Tweet.from_dict(tweet_info) res = tweet_model.save(force_insert=True) print('rows effected: ', res) except Exception as e: print(e) print('rolling back') tweet_model.rollback() return True
def store_tweet(self, tweet): new_tweet = Tweet() for column in Tweet.__table__.columns: if column.name == 'hashtags': obj_properties = [ obj.get('text') for obj in tweet.get(column.name) ] setattr(new_tweet, column.name, obj_properties) elif column.name == 'user_id': setattr(new_tweet, column.name, tweet['user']['id']) elif column.name == 'urls': obj_properties = [ obj.get('url') for obj in tweet.get(column.name) ] setattr(new_tweet, column.name, obj_properties) elif column.name == 'user_mentions': obj_properties = [ obj.get('id') for obj in tweet.get(column.name) ] setattr(new_tweet, column.name, obj_properties) else: try: setattr(new_tweet, column.name, tweet.get(column.name)) except AttributeError: pass if self.sql_session.query( exists().where(Tweet.id == new_tweet.id)).scalar(): if new_tweet.id not in self.tweet_update_ids: self.tweet_updates.append(new_tweet.__dict__) self.tweet_update_ids.append(new_tweet.id) else: if new_tweet.id not in self.insert_ids: self.inserts.append(new_tweet) self.insert_ids.append(new_tweet.id)
def from_db(limit): brain = Brain() tweets = Tweet.select().order_by(Tweet.tweet_id.desc()).limit(limit) tweets_used = 0 for tweet in tweets: if brain.add_tweet(tweet.text): tweets_used += 1 logger.debug('[load_from_db] limit=%d loaded=%d used=%d chain=%d', limit, len(tweets), tweets_used, len(brain.chain)) return brain
plt.tight_layout() fig = plt.figure(figsize=(10, len(accounts.items()) * 4)) plots = len(accounts.items()) for i, (acc, val) in enumerate(accounts.items()): ax1 = fig.add_subplot(plots, 1, i + 1) ax1.set_xlabel("Tedni") ax1.set_ylabel("Tvitov/teden") data = [] prev_week = 1 for week, count in Tweet.select( fn.strftime('%W', Tweet.date).alias('week'), fn.Count(Tweet.id.distinct())).where(Tweet.user == acc).group_by( fn.strftime('%W', Tweet.date)).tuples(): week = int(week) for i in range(1, week - prev_week): data.append(0) data.append(count) prev_week = week #2018 ima 53 tednov for i in range(0, 53 - len(data)): data.append(0) # normalizacija v interval [0 - 1.0] #norm = Normalize() #data = norm(data)
lat = None else: lon=tweet['coordinates']['coordinates'][0][0], lat=tweet['coordinates']['coordinates'][1][0], ts = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(tweet['created_at'][0], '%a %b %d %H:%M:%S +0000 %Y')) t = Tweet(id=int(tweet['id_str'][0]), is_quote_status=tweet['is_quote_status'][0], in_reply_to_status_id=tweet['in_reply_to_status_id'], in_reply_to_user_id=tweet['in_reply_to_user_id'], quoted_status_id=qsid, source=tweet['source'][0], longitude=lon, latitude=lat, lang=tweet['lang'][0], created_at=ts, retweet_count=tweet['retweet_count'][0], favorite_count=tweet['favorite_count'][0], text=tweet['text'], user_id=tweet['user']['id'][0], group='de_panel') session.add(t) nchunk = 10000 if tweet_count % nchunk == 0: print("Processed {} tweets".format(tweet_count)) print("Committing {} to database...".format( nchunk)) session.commit()
while start < end: next_date = start + step for acc, name in accounts.items(): #print("ACC:", acc) values['q'] = "from:{} since:{} until:{}".format(acc, start.strftime('%Y-%m-%d'), next_date.strftime('%Y-%m-%d')) url = REQ + "?" + urllib.parse.urlencode(values) req = urllib.request.Request(url, headers=headers) response = urllib.request.urlopen(req) page = response.read() try: json_data = json.loads(page.decode('utf-8')) sel = Selector(json_data['items_html']) tweets = sel.xpath("//div[contains(@class,'tweet')]/@data-tweet-id").extract() for tweet in tweets: t, created = Tweet.get_or_create(id=tweet, user=acc, date=start) if created: print("NEW TWEET:", tweet) if json_data['has_more_items']: print("HAS MORE:", start) except: print("ERROR:", start) start = next_date print("ROUND END:", i)
# Compose tweet tweet_content = compose_tweet(tweet_data) # Post tweet try: response = api.update_status(tweet_content) except tweepy.error.TweepError as e: print(e) response = False # Write data to database if response: tweet_id = response.id_str tweet = Tweet( tweet_id=tweet_id, content=tweet_content, tweet_data=tweet_data ) spy_price_history = PriceHistory( asset_id=spy.id, price=current_spy_in_dollars, price_sats=spy_in_sats, ) btc_price_history = PriceHistory( asset_id=btc.id, price=current_btc_in_dollars, price_sats=sats_in_btc ) session.add(tweet) session.add(spy_price_history) session.add(btc_price_history)