Beispiel #1
0
def insert_retweet_data(retweet_data):
    """Inserts retweet data"""
    insert_tweet_data(retweet_data['retweet_data'])
    with GetDBSession() as db_session:
        retweet_id = int(retweet_data['retweet_data']['tweet_id'])
        retweetSource = db_session.query(twitter_models.TwitterSource).filter(
            twitter_models.TwitterSource.tweet_id == retweet_id).first()

        tweetTextModel = insert_text_data(models.AllowedSources.twitter.name,
                                          retweet_data['url'], '',
                                          retweet_data['tweet_created'],
                                          db_session)

        tweetModel = twitter_models.TwitterSource(
            retweet_source_id=retweetSource.id,
            tweeter_user_name=retweet_data['user_name'],
            tweet_id=retweet_data['tweet_id'],
            is_retweet=True)
        tweetTextModel.twitter_source = tweetModel
        try:
            db_session.commit()
        except IntegrityError as e:
            if 'duplicate key value' in e.message:
                # We've already captured this so, moving on
                eleanor_logger.info(
                    'Duplicate tweet is already in the database, skipping')
            else:
                eleanor_logger.critical(
                    ('A database error occurred while attempting to '
                     'insert tweet %s'), e)
        except Exception as e:
            # Something real bad happened
            eleanor_logger.critical(
                ('An error has occurred while inserting a tweet into '
                 'the database %s'), e)
Beispiel #2
0
def insert_non_retweet_data(tweet_data):
    """Takes the passed in JSON tweet_data and inserts into the database"""
    eleanor_logger.debug('Inserting tweet data')
    with GetDBSession() as db_session:
        tweetTextModel = insert_text_data(
            models.AllowedSources.twitter.name,
            tweet_data['url'], tweet_data['tweet_text'],
            date_parse(tweet_data['tweet_created']), db_session)

        tweetModel = twitter_models.TwitterSource(
            tweeter_user_name=tweet_data['user_name'],
            tweet_id=tweet_data['tweet_id'],
            is_retweet=False)
        tweetTextModel.twitter_source = tweetModel

        add_user_mentions(tweet_data, tweetModel)
        add_hashtags(tweet_data, tweetModel)
        add_urls(tweet_data, tweetModel)

        try:
            db_session.commit()
        except IntegrityError as e:
            if 'duplicate key value' in e.message:
                # We've already captured this so, moving on
                eleanor_logger.info(
                    'Duplicate tweet is already in the database, skipping')
            else:
                eleanor_logger.critical(
                    ('A database error occurred while attempting '
                     'to insert tweet %s'), e)
        except Exception as e:
            # Something real bad happened
            eleanor_logger.critical(
                ('An error has occurred while inserting a tweet into '
                 'the database %s'), e)
Beispiel #3
0
def get_tweet_data_by_id(tweet_id):
    """When given a tweet_id returns the tweet data if in the database else
    returns None
    """
    with GetDBSession() as db_session:
        twitterSource = db_session.query(twitter_models.TwitterSource).filter(
            twitter_models.TwitterSource.tweet_id == tweet_id).first()
        if not twitterSource:
            return None

        retweet_data = {}
        if twitterSource.is_retweet:
            retweetSource = db_session.query(twitter_models).get(
                twitterSource.retweet_source_id)
            retweet_data = {
                'user_name':
                retweetSource.tweeter_user_name,
                'tweet_id':
                retweetSource.tweet_id,
                'url':
                retweetSource.text_source.source_url,
                'tweet_text':
                retweetSource.text_source.written_text,
                'tweet_created':
                get_string_from_datetime(
                    retweetSource.text_source.time_posted),
                'is_retweet':
                retweetSource.is_retweet,
                'user_mentions':
                [mention.user_name for mention in retweetSource.mentions],
                'hashtags':
                [hashtag.hashtag for hashtag in retweetSource.hashtags],
                'tweet_urls': [url.url for url in retweetSource.urls]
            }

        ret_data = {
            'user_name':
            twitterSource.tweeter_user_name,
            'tweet_id':
            twitterSource.tweet_id,
            'url':
            twitterSource.text_source.source_url,
            'tweet_text':
            twitterSource.text_source.written_text,
            'tweet_created':
            get_string_from_datetime(twitterSource.text_source.time_posted),
            'is_retweet':
            twitterSource.is_retweet,
            'user_mentions':
            [mention.user_name for mention in twitterSource.mentions],
            'hashtags':
            [hashtag.hashtag for hashtag in twitterSource.hashtags],
            'tweet_urls': [url.url for url in twitterSource.urls],
            'retweet_data':
            retweet_data
        }
    return ret_data
Beispiel #4
0
def get_tracked_twitter_tl_users():
    """
    Pull the list of twitter users that is being polled by the interns
    """
    eleanor_logger.debug('Getting listing of tracked twitter users')
    tracked_users = []
    with GetDBSession() as db_session:
        tracked_users_query = db_session.query(
            twitter_models.PolledTimelineUsers)
        for user in tracked_users_query:
            tracked_users.append(user.user_name)
    return tracked_users
Beispiel #5
0
def begin_tracking_twitter_user(username):
    """
    Add a twitter user to be tracked to the databse

    Arguments:
    username -- Twitter username/screen_name to be added. For example to add
    username '@NASA' to be polled: add_tracked_twitter_tl_user('NASA')
    """
    new_user = twitter_models.PolledTimelineUsers(user_name=username)
    with GetDBSession() as db_session:
        db_session.add(new_user)
        db_session.commit()
    eleanor_logger.debug('Adding twitter user %s to be tracked', username)
Beispiel #6
0
def search_count_of_user_tweets_on_day(username, date, search_term):
    """When given a username, datetime, and search_term return the number of
    times search term was tweeted by username on the day of datetime"""
    return_data = {}
    date = date_parse(date)
    start = datetime(year=date.year, month=date.month, day=date.day)
    end = start + timedelta(days=1)
    with GetDBSession() as db_session:
        user_query = db_session.query(twitter_models.TwitterSource).filter(
            twitter_models.TwitterSource.tweeter_user_name == username).join(
                twitter_models.TwitterSource.text_source, aliased=True).filter(
                    and_(
                        and_(models.TextSource.time_posted > start,
                             models.TextSource.time_posted < end),
                        models.TextSource.written_text.contains(search_term)))

        return_data[username] = {
            search_term: user_query.count(),
            'date': start.strftime('%Y-%m-%d')
        }
    return return_data
Beispiel #7
0
def last_twitter_user_entry_id(screen_name):
    """
    Returns the latest tweet id assocaited with screen_name otherwise returns
    None.

    Arguments:
    screen_name -- Twitter user_name/screen_name to check for.
    """
    with GetDBSession() as db_session:
        if is_twitter_user_in_interns(screen_name):
            # Check to make sure it's not a retweet
            # change this to filter against if retweet
            query = db_session.query(
                twitter_models.TwitterSource.tweet_id
            ).filter_by(tweeter_user_name=screen_name).order_by(
                desc(twitter_models.TwitterSource.tweet_id)).first().tweet_id

            eleanor_logger.debug('Last tweet id from twitter user %s is %s',
                                 screen_name, query)
            return query
        else:
            return None
Beispiel #8
0
def is_twitter_user_in_interns(screen_name):
    """
    Checks to see if a twitter user exists within the database. Returns True
    if the screen_name is present in the database else returns False.

    For example checking to see if the user '@NASA' exists within the database
    the method would be called like so: is_twitter_user_in_interns('NASA')

    Arguments:
    screen_name -- Twitter user_name/screen_name to check for.
    """
    screen_names = []
    with GetDBSession() as db_session:
        distinct_screen_names = db_session.query(
            distinct(twitter_models.TwitterSource.tweeter_user_name))
    for sn in distinct_screen_names:
        screen_names.append(sn[0])
    is_user_tracked = screen_name in screen_names
    eleanor_logger.debug(
        'Twitter username %s is currently being tracked by interns is: %s',
        screen_name, is_user_tracked)
    return screen_name in screen_names