def create_tweet_helper(tweet_data, user): t = tweet_data retweet = True if t['text'][:3] == 'RT ' else False coordinates = json.dumps(t['coordinates']) return Tweet(tid=t['id_str'], tweet=t['text'], user=user, coordinates=coordinates, created_at=t['created_at'], favorite_count=t['favorite_count'], is_retweet=retweet)
def get_tweets(SEARCH): res = Tweet.select(Tweet.id).where(Tweet.searchterm == SEARCH).order_by( Tweet.id.desc()).get() ID = res.id params = {"q": SEARCH, "until": TODAY, "since_id": ID} try: results = api.GetSearch(raw_query=parse.urlencode(params)) except: print("error, no results") return results
def readCSVToTweets(path): with open(path, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL) tweetsPositivos = [] tweetsNegativos = [] for row in reader: tweet = Tweet(row['id'], row['tokens'], row['original'], row['classe'], row['emojis']) if (tweet.classe == POSITIVE): tweetsPositivos.append(tweet) else: tweetsNegativos.append(tweet) return tweetsPositivos, tweetsNegativos
def create_tweet_helper(tweet_data, user): t = tweet_data retweet = True if t['text'][:3] == 'RT ' else False coordinates = json.dumps(t['coordinates']) tweet = Tweet(tid=t['id_str'], tweet=t['text'], user=user, coordinates=coordinates, created_at=t['created_at'], favorite_count=t['favorite_count'], in_reply_to_screen_name=t['in_reply_to_screen_name'], in_reply_to_status_id=t['in_reply_to_status_id'], in_reply_to_user_id=t['in_reply_to_user_id'], lang=t.get('lang'), quoted_status_id=t.get('quoted_status_id'), retweet_count=t['retweet_count'], source=t['source'], is_retweet=retweet) return tweet
def main(): logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO) db.connect() bot = init_detwtr_bot() for job in Job.select(): logging.info("Processing job: {id}".format(id=job.id)) if (datetime.datetime.now() - job.tweet.created_at) < datetime.timedelta(minutes=5): logging.info("Tweet is not old enough, wait a few more minutes") continue is_duplicate = False for tweet in Tweet.select().where((Tweet.user == job.tweet.user) & ( Tweet.created_at > job.tweet.created_at) & ~(Tweet.is_deleted) & ~(Tweet.is_withheld)): levdist = editdistance.eval(tweet.text, job.tweet.text) if levdist <= max(3, int(math.ceil(14 / 140 * len(job.tweet.text)))) and job.tweet.media == tweet.media: is_duplicate = True logging.info("Duplicate found:\n{tweet_1}\n---\n{tweet_2}".format(tweet_1=job.tweet.text, tweet_2=tweet.text)) break if is_duplicate: logging.info("Tweet is very similar to other tweets, won't restore") job.delete_instance() else: logging.info("Found no similar tweets, going to restore! :3") text = job.tweet.text.replace("@", "&") try: if job.tweet.media: media = io.BytesIO(job.tweet.media) resp = bot.upload_media(media=media) bot.update_status(status=text, media_ids=[resp["media_id"]]) else: bot.update_status(status=text) logging.info("Tweet restored, all is well...") job.delete_instance() except TwythonError as e: logging.error("TwythonError: {error}".format(error=repr(e))) if "Status is a duplicate" in e.msg: job.delete_instance() db.close()
def create_tweet_helper(tweet_data, user): #alias for shorten calls t = tweet_data retweet = True if t["text"][:3] == "RT " else False coordinates = json.dumps(t["coordinates"]) tweet = Tweet(tid=t["id_str"], tweet=t["text"], user=user, coordinates=coordinates, created_at=t["created_at"], favorite_count=t["favorite_count"], in_reply_to_screen_name=t["in_reply_to_screen_name"], in_reply_to_status_id=t["in_reply_to_status_id"], in_reply_to_user_id=t["in_reply_to_user_id"], lang=t.get("lang"), quoted_status_id=t.get("quoted_status_id"), retweet_count=t["retweet_count"], source=t["source"], is_retweet=retweet) return tweet
def create_tweet_helper(tweet_data, user): # alias to shorten calls t = tweet_data retweet = True if t['text'][:3] == 'RT ' else False coordinates = t['coordinates'] if coordinates is not None: coordinates = json.dumps(t['coordinates']) tweet = Tweet(tid=t['id_str'], tweet=t['text'], user=user, coordinates=coordinates, created_at=datetime.strptime(t['created_at'], _TIME_FORMAT), favorite_count=t['favorite_count'], in_reply_to_screen_name=t.get('in_reply_to_screen_name'), in_reply_to_status_id=t.get('in_reply_to_status_id'), in_reply_to_user_id=t.get('in_reply_to_user_id'), lang=t.get('lang'), quoted_status_id=t.get('quoted_status_id'), retweet_count=t['retweet_count'], source=t['source'], is_retweet=retweet) return tweet
def on_success(self, data): if "text" in data: payload = {} # skip if tweet is from bot itself if data["user"]["id_str"] == settings.BOT_ID: return # skip tweet if it's just a RT if "retweeted_status" in data: return payload["text"] = data["text"] payload["tweet_id"] = data["id_str"] payload["user"] = User.get_or_create(user_id=data["user"]["id_str"])[0] if "media" in data["entities"]: media_url = data["entities"]["media"][0]["media_url"] url_in_tweet = data["entities"]["media"][0]["url"] payload["text"] = payload["text"].replace(url_in_tweet, "") r = requests.get(media_url) payload["media"] = r.content # unescape HTML entities payload["text"] = html.unescape(payload["text"]) # storing tweet in database logging.info("Adding new tweet to DB: {id} from {user}".format(id=payload["tweet_id"], user=payload["user"].user_id)) tweet_db = Tweet(**payload) try: tweet_db.save() except IntegrityError: logging.error("Tweet already present in DB") if "delete" in data: logging.info("Received delete message, checking if corresponding tweet is stored: {id}".format( id=data["delete"]["status"]["id_str"])) instance = None try: instance = Tweet.get(Tweet.tweet_id == data["delete"]["status"]["id_str"]) logging.info("Tweet found! :)") except DoesNotExist: logging.info("Tweet not found! :(") event_db = Event(event="delete", user=User.get_or_create(user_id=data["delete"]["status"]["user_id_str"])[0], tweet=instance) event_db.save() if instance: # mark this tweet as deleted instance.is_deleted = True instance.save() # add tweet to job queue jobs_db = Job(tweet=instance) try: jobs_db.save() except IntegrityError: logging.error("Tweet is already marked for restoration") if "status_withheld" in data: logging.info("Received withheld content notice, checking if corresponding tweet is stored: {id}".format( id=str(data["status_withheld"]["id"]))) instance = None try: instance = Tweet.get(Tweet.tweet_id == str(data["status_withheld"]["id"])) logging.info("Tweet found! :)") except DoesNotExist: logging.info("Tweet not found! :(") event_db = Event(event="withheld", user=User.get_or_create(user_id=str(data["status_withheld"]["user_id"]))[0], tweet=instance) event_db.save() if instance: # mark this tweet as deleted instance.is_withheld = True instance.save() # add tweet to job queue jobs_db = Job(tweet=instance) try: jobs_db.save() except IntegrityError: logging.error("Tweet is already marked for restoration")
def save_to_database(tweet, keyword): #initiate session with db Session = sessionmaker(bind=database.engine) session = Session() tweet_keyword = keyword tweet_possibly_sensitive = False tweet_created_at = tweet['created_at'] tweet_created_at = convert(tweet_created_at) tweet_id = tweet['id_str'] tweet_text = tweet['text'] tweet_retweet_count = tweet['retweet_count'] tweet_favorite_count = tweet['favorite_count'] tweet_hashtags_used = [] for hashtag in tweet['entities']['hashtags']: tweet_hashtags_used.append(hashtag['text']) tweet_symbols_used = [] for symbol in tweet_symbols_used: tweet_symbols_used.append(symbol['text']) tweet_users_mentioned = [] for user in tweet['entities']['user_mentions']: tweet_users_mentioned.append(user['screen_name']) tweet_user_screen_name = tweet['user']['screen_name'] tweet_user_name = tweet['user']['name'] tweet_user_verified = tweet['user']['verified'] tweet_location = tweet['geo'] if 'possibly_sensitive' in tweet: tweet_possibly_sensitive = tweet['possibly_sensitive'] tweet_to_save = Tweet() tweet_to_save.tweet_created_at = tweet_created_at tweet_to_save.tweet_keyword = tweet_keyword tweet_to_save.tweet_id = tweet_id tweet_to_save.tweet_text = tweet_text tweet_to_save.tweet_retweet_count = tweet_retweet_count tweet_to_save.tweet_favorite_count = tweet_favorite_count tweet_to_save.tweet_hashtags_used = tweet_hashtags_used tweet_to_save.tweet_symbols_used = tweet_symbols_used tweet_to_save.tweet_users_mentioned = tweet_users_mentioned tweet_to_save.tweet_user_screen_name = tweet_user_screen_name tweet_to_save.tweet_user_name = tweet_user_name tweet_to_save.tweet_user_verified = tweet_user_verified tweet_to_save.tweet_location = str(tweet_location) tweet_to_save.tweet_possibly_sensitive = tweet_possibly_sensitive session.add(tweet_to_save) session.commit() session.close()