예제 #1
0
def create_tweet_helper(tweet_data, user):
    t = tweet_data
    retweet = True if t['text'][:3] == 'RT ' else False
    coordinates = json.dumps(t['coordinates'])
    return Tweet(tid=t['id_str'],
                 tweet=t['text'],
                 user=user,
                 coordinates=coordinates,
                 created_at=t['created_at'],
                 favorite_count=t['favorite_count'],
                 is_retweet=retweet)
예제 #2
0
def get_tweets(SEARCH):

    res = Tweet.select(Tweet.id).where(Tweet.searchterm == SEARCH).order_by(
        Tweet.id.desc()).get()
    ID = res.id
    params = {"q": SEARCH, "until": TODAY, "since_id": ID}
    try:
        results = api.GetSearch(raw_query=parse.urlencode(params))
    except:
        print("error, no results")
    return results
예제 #3
0
def readCSVToTweets(path):
    with open(path, 'r') as csvfile:
        reader = csv.DictReader(csvfile,
                                delimiter=';',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)
        tweetsPositivos = []
        tweetsNegativos = []

        for row in reader:
            tweet = Tweet(row['id'], row['tokens'], row['original'],
                          row['classe'], row['emojis'])
            if (tweet.classe == POSITIVE):
                tweetsPositivos.append(tweet)
            else:
                tweetsNegativos.append(tweet)

        return tweetsPositivos, tweetsNegativos
예제 #4
0
def create_tweet_helper(tweet_data, user):
    t = tweet_data
    retweet = True if t['text'][:3] == 'RT ' else False
    coordinates = json.dumps(t['coordinates'])
    tweet = Tweet(tid=t['id_str'],
                  tweet=t['text'],
                  user=user,
                  coordinates=coordinates,
                  created_at=t['created_at'],
                  favorite_count=t['favorite_count'],
                  in_reply_to_screen_name=t['in_reply_to_screen_name'],
                  in_reply_to_status_id=t['in_reply_to_status_id'],
                  in_reply_to_user_id=t['in_reply_to_user_id'],
                  lang=t.get('lang'),
                  quoted_status_id=t.get('quoted_status_id'),
                  retweet_count=t['retweet_count'],
                  source=t['source'],
                  is_retweet=retweet)
    return tweet
예제 #5
0
파일: restore.py 프로젝트: buckket/detwtr
def main():
    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO)
    db.connect()
    bot = init_detwtr_bot()
    for job in Job.select():
        logging.info("Processing job: {id}".format(id=job.id))

        if (datetime.datetime.now() - job.tweet.created_at) < datetime.timedelta(minutes=5):
            logging.info("Tweet is not old enough, wait a few more minutes")
            continue

        is_duplicate = False
        for tweet in Tweet.select().where((Tweet.user == job.tweet.user) & (
                    Tweet.created_at > job.tweet.created_at) & ~(Tweet.is_deleted) & ~(Tweet.is_withheld)):
            levdist = editdistance.eval(tweet.text, job.tweet.text)
            if levdist <= max(3, int(math.ceil(14 / 140 * len(job.tweet.text)))) and job.tweet.media == tweet.media:
                is_duplicate = True
                logging.info("Duplicate found:\n{tweet_1}\n---\n{tweet_2}".format(tweet_1=job.tweet.text,
                                                                                  tweet_2=tweet.text))
                break

        if is_duplicate:
            logging.info("Tweet is very similar to other tweets, won't restore")
            job.delete_instance()
        else:
            logging.info("Found no similar tweets, going to restore! :3")
            text = job.tweet.text.replace("@", "&")
            try:
                if job.tweet.media:
                    media = io.BytesIO(job.tweet.media)
                    resp = bot.upload_media(media=media)
                    bot.update_status(status=text, media_ids=[resp["media_id"]])
                else:
                    bot.update_status(status=text)
                logging.info("Tweet restored, all is well...")
                job.delete_instance()
            except TwythonError as e:
                logging.error("TwythonError: {error}".format(error=repr(e)))
                if "Status is a duplicate" in e.msg:
                    job.delete_instance()
    db.close()
예제 #6
0
def create_tweet_helper(tweet_data, user):
    #alias for shorten calls
    t = tweet_data
    retweet = True if t["text"][:3] == "RT " else False
    coordinates = json.dumps(t["coordinates"])
    tweet = Tweet(tid=t["id_str"],
                  tweet=t["text"],
                  user=user,
                  coordinates=coordinates,
                  created_at=t["created_at"],
                  favorite_count=t["favorite_count"],
                  in_reply_to_screen_name=t["in_reply_to_screen_name"],
                  in_reply_to_status_id=t["in_reply_to_status_id"],
                  in_reply_to_user_id=t["in_reply_to_user_id"],
                  lang=t.get("lang"),
                  quoted_status_id=t.get("quoted_status_id"),
                  retweet_count=t["retweet_count"],
                  source=t["source"],
                  is_retweet=retweet)

    return tweet
예제 #7
0
def create_tweet_helper(tweet_data, user):
    # alias to shorten calls
    t = tweet_data
    retweet = True if t['text'][:3] == 'RT ' else False
    coordinates = t['coordinates']
    if coordinates is not None:
        coordinates = json.dumps(t['coordinates'])

    tweet = Tweet(tid=t['id_str'],
                  tweet=t['text'],
                  user=user,
                  coordinates=coordinates,
                  created_at=datetime.strptime(t['created_at'], _TIME_FORMAT),
                  favorite_count=t['favorite_count'],
                  in_reply_to_screen_name=t.get('in_reply_to_screen_name'),
                  in_reply_to_status_id=t.get('in_reply_to_status_id'),
                  in_reply_to_user_id=t.get('in_reply_to_user_id'),
                  lang=t.get('lang'),
                  quoted_status_id=t.get('quoted_status_id'),
                  retweet_count=t['retweet_count'],
                  source=t['source'],
                  is_retweet=retweet)

    return tweet
예제 #8
0
파일: detwtr.py 프로젝트: buckket/detwtr
    def on_success(self, data):
        if "text" in data:
            payload = {}

            # skip if tweet is from bot itself
            if data["user"]["id_str"] == settings.BOT_ID:
                return

            # skip tweet if it's just a RT
            if "retweeted_status" in data:
                return

            payload["text"] = data["text"]
            payload["tweet_id"] = data["id_str"]
            payload["user"] = User.get_or_create(user_id=data["user"]["id_str"])[0]

            if "media" in data["entities"]:
                media_url = data["entities"]["media"][0]["media_url"]
                url_in_tweet = data["entities"]["media"][0]["url"]
                payload["text"] = payload["text"].replace(url_in_tweet, "")
                r = requests.get(media_url)
                payload["media"] = r.content

            # unescape HTML entities
            payload["text"] = html.unescape(payload["text"])

            # storing tweet in database
            logging.info("Adding new tweet to DB: {id} from {user}".format(id=payload["tweet_id"],
                                                                           user=payload["user"].user_id))
            tweet_db = Tweet(**payload)
            try:
                tweet_db.save()
            except IntegrityError:
                logging.error("Tweet already present in DB")

        if "delete" in data:
            logging.info("Received delete message, checking if corresponding tweet is stored: {id}".format(
                id=data["delete"]["status"]["id_str"]))

            instance = None
            try:
                instance = Tweet.get(Tweet.tweet_id == data["delete"]["status"]["id_str"])
                logging.info("Tweet found! :)")
            except DoesNotExist:
                logging.info("Tweet not found! :(")

            event_db = Event(event="delete",
                             user=User.get_or_create(user_id=data["delete"]["status"]["user_id_str"])[0],
                             tweet=instance)
            event_db.save()

            if instance:
                # mark this tweet as deleted
                instance.is_deleted = True
                instance.save()

                # add tweet to job queue
                jobs_db = Job(tweet=instance)
                try:
                    jobs_db.save()
                except IntegrityError:
                    logging.error("Tweet is already marked for restoration")

        if "status_withheld" in data:
            logging.info("Received withheld content notice, checking if corresponding tweet is stored: {id}".format(
                id=str(data["status_withheld"]["id"])))

            instance = None
            try:
                instance = Tweet.get(Tweet.tweet_id == str(data["status_withheld"]["id"]))
                logging.info("Tweet found! :)")
            except DoesNotExist:
                logging.info("Tweet not found! :(")

            event_db = Event(event="withheld",
                             user=User.get_or_create(user_id=str(data["status_withheld"]["user_id"]))[0],
                             tweet=instance)
            event_db.save()

            if instance:
                # mark this tweet as deleted
                instance.is_withheld = True
                instance.save()

                # add tweet to job queue
                jobs_db = Job(tweet=instance)
                try:
                    jobs_db.save()
                except IntegrityError:
                    logging.error("Tweet is already marked for restoration")
예제 #9
0
def save_to_database(tweet, keyword):
    #initiate session with db
    Session = sessionmaker(bind=database.engine)
    session = Session()
    tweet_keyword = keyword
    tweet_possibly_sensitive = False
    tweet_created_at = tweet['created_at']
    tweet_created_at = convert(tweet_created_at)
    tweet_id = tweet['id_str']
    tweet_text = tweet['text']
    tweet_retweet_count = tweet['retweet_count']
    tweet_favorite_count = tweet['favorite_count']
    tweet_hashtags_used = []

    for hashtag in tweet['entities']['hashtags']:
        tweet_hashtags_used.append(hashtag['text'])

    tweet_symbols_used = []
    for symbol in tweet_symbols_used:
        tweet_symbols_used.append(symbol['text'])

    tweet_users_mentioned = []

    for user in tweet['entities']['user_mentions']:
        tweet_users_mentioned.append(user['screen_name'])

    tweet_user_screen_name = tweet['user']['screen_name']
    tweet_user_name = tweet['user']['name']
    tweet_user_verified = tweet['user']['verified']
    tweet_location = tweet['geo']

    if 'possibly_sensitive' in tweet:
        tweet_possibly_sensitive = tweet['possibly_sensitive']

    tweet_to_save = Tweet()
    tweet_to_save.tweet_created_at = tweet_created_at
    tweet_to_save.tweet_keyword = tweet_keyword
    tweet_to_save.tweet_id = tweet_id
    tweet_to_save.tweet_text = tweet_text
    tweet_to_save.tweet_retweet_count = tweet_retweet_count
    tweet_to_save.tweet_favorite_count = tweet_favorite_count
    tweet_to_save.tweet_hashtags_used = tweet_hashtags_used
    tweet_to_save.tweet_symbols_used = tweet_symbols_used
    tweet_to_save.tweet_users_mentioned = tweet_users_mentioned
    tweet_to_save.tweet_user_screen_name = tweet_user_screen_name
    tweet_to_save.tweet_user_name = tweet_user_name
    tweet_to_save.tweet_user_verified = tweet_user_verified
    tweet_to_save.tweet_location = str(tweet_location)
    tweet_to_save.tweet_possibly_sensitive = tweet_possibly_sensitive
    session.add(tweet_to_save)
    session.commit()
    session.close()