Ejemplo n.º 1
0
def save_tweets(tweets, search_term, location_search_term=None):
    """
    Saves given tweet data in MongoDB database.

    :param tweets: Tweet data
    :param search_term: Search term entered by user
    :param location_search_term: Location search term entered by user (optional)
    """

    try:
        if location_search_term:
            # Cycle through list of Tweets
            for status in tweets:
                tweet_id = status['tweet_id']
                # Check if tweet already exists in db, skip this iteration if it does
                id_query = len(Tweet.objects(tweet_id=tweet_id))
                if id_query >= 1:
                    continue
                # Define record to save to db
                record = Tweet(
                    tweet_id=status['tweet_id'],
                    tweet_time=status['created_at'],
                    tweet_text=status['text'],
                    tweet_user=status['user'],
                    tweet_user_fullname=status['user_fullname'],
                    profile_image_url=status['profile_image_url'],
                    sentiment_type=status['sentiment'],
                    sentiment_score=status['sentimentScore'],
                    location_geo=status['location_geo'],
                    location_address=status['location_address'],
                    keyword_search_term=search_term,
                    location_search_term=location_search_term
                )
                # Save to DB
                record.save()
        else:
            # Cycle through list of processed Tweets
            for status in tweets:
                tweet_id = status['tweet_id']
                # Check if tweet already exists in db, skip this iteration if it does
                id_query = len(Tweet.objects(tweet_id=tweet_id))
                if id_query >= 1:
                    continue
                # Define record to save to db
                record = Tweet(
                    tweet_id=status['tweet_id'],
                    tweet_time=status['created_at'],
                    tweet_text=status['text'],
                    tweet_user=status['user'],
                    tweet_user_fullname=status['user_fullname'],
                    profile_image_url=status['profile_image_url'],
                    sentiment_type=status['sentiment'],
                    sentiment_score=status['sentimentScore'],
                    keyword_search_term=search_term
                )
                # Save to DB
                record.save()
    except Exception:
        raise Exception("Database error")
Ejemplo n.º 2
0
def search_keyword(keyword, count, location=None):
    """
    Performs a Twitter search for a given keyword.
    Will restrict keyword search to within 10 miles radius of given location (if provided).

    :param keyword: Keyword to search
    :param location: Location object from geopy to restrict results to (optional)
    :param count: Number of Tweets to search for
    :return: List of dictionaries containing parsed Tweet data from search
    """

    # Perform OAuth connection to Twitter, creates instance of Twython
    twitter = twitter_auth()

    is_location_search = False
    in_db = False

    # List to store our data
    tweets = []

    # Attempt to query Twitter REST API using Twython
    try:
        if location is None:
            search_result = twitter.search(q=keyword, lang="en", count=count)
        else:
            is_location_search = True
            search_result = twitter.search(q=keyword, lang="en", geocode="%s,%s,10mi" % (location.latitude, location.longitude), count=count)
    except Exception:
        raise Exception("No Twitter results returned")

    # Cycle through results and parse the data we want, save as dictionary and store in 'tweets' list
    for status in search_result['statuses']:
        tweet_id = status['id']
        # Check if Tweet is already in DB
        id_query = len(Tweet.objects(tweet_id=tweet_id))
        if id_query >= 1:
            in_db = True
            continue
        tweet = {}
        tweet['tweet_id'] = tweet_id
        tweet['text'] = status['text'].strip()
        tm = status['created_at']
        tm = datetime.strptime(tm, '%a %b %d %H:%M:%S +0000 %Y')
        tweet['created_at'] = tm
        tweet['user'] = status['user']['screen_name'].strip()
        tweet['user_fullname'] = status['user']['name']
        tweet['profile_image_url'] = status['user']['profile_image_url']
        if is_location_search:
            tweet['location_geo'] = {"latitude": location.latitude, "longitude": location.longitude}
            tweet['location_address'] = location.address
        # Append parsed tweet data dictionary to results list
        tweets.append(tweet)

    # If no search results returned and data is not in our database, raise exception
    if not tweets and not in_db:
        raise Exception("No Twitter results returned")

    return tweets
Ejemplo n.º 3
0
    def delete(self, username):
        context = self.get_context(username)
        muse = context.get('muse')

        # Fetch and clear out this user's tweets.
        tweets = Tweet.objects(username=username)
        for tweet in tweets:
            tweet.delete()

        muse.delete()

        return jsonify({'success':True})
Ejemplo n.º 4
0
    def delete(self, username):
        context = self.get_context(username)
        muse = context.get('muse')

        # Fetch and clear out this user's tweets.
        tweets = Tweet.objects(username=username)
        for tweet in tweets:
            tweet.delete()

        muse.delete()

        return jsonify({'success': True})
Ejemplo n.º 5
0
def search_user(screen_name, count):
    """
    Performs a Twitter search for a given username.

    :param screen_name: Username of Twitter user to search for
    :param count: Number of the user's Tweets to return
    :return: List of dictionaries containing parsed Tweet data from search
    """

    # Perform OAuth connection to Twitter, creates instance of Twython
    twitter = twitter_auth()

    in_db = False

    # List to store our results
    tweets = []

    # Attempt to query Twitter REST API using Twython
    try:
        search_result = twitter.get_user_timeline(screen_name=screen_name, count=count)
    except Exception:
        raise Exception("No Twitter results returned")

    if search_result[0]['user']['lang'] != "en":
        # If not English, we can't perform sentiment analysis (limitation of AlchemyAPI)
        raise Exception("Not an English language user")

    # Cycle through results and parse the data we want, save as dictionary and store in 'tweets' list
    for status in search_result:
        tweet_id = status['id']
        # Check if Tweet is already in DB
        id_query = len(Tweet.objects(tweet_id=tweet_id))
        if id_query >= 1:
            in_db = True
            continue
        tweet = {}
        tweet['tweet_id'] = tweet_id
        tweet['text'] = status['text'].strip()
        tm = status['created_at']
        tm = datetime.strptime(tm, '%a %b %d %H:%M:%S +0000 %Y')
        tweet['created_at'] = tm
        tweet['user'] = screen_name[1:]
        tweet['user_fullname'] = status['user']['name']
        tweet['profile_image_url'] = status['user']['profile_image_url']
        # Add parsed tweet data dictionary to results list
        tweets.append(tweet)

    # If no search results returned and data is not in our database, raise exception
    if not tweets and not in_db:
        raise Exception("No Twitter results returned")

    return tweets
Ejemplo n.º 6
0
def delete_obsolete_tweets():
    max_datetime = datetime.now().date()
    Tweet.objects(datetime__lt=max_datetime).delete()
Ejemplo n.º 7
0
def search():
    """
    Handles post request from index page search form, processes form and calls manager functions accordingly.
    Passes results to rendered search template, or redirects user to homepage if exception occurs.

    :return:    Rendered template with search results
    """

    from app import manager
    from app import helper
    import time

    page = "search"

    if request.method == 'POST':
        # Get form data with trailing spaces removed
        keyword = request.form['keyword'].strip()
        count = request.form['count'].strip()
        location = request.form['location'].strip()

        # Check if search term has been provided
        if not keyword:
            flash("Blessed are the forgetful! Looks like you didn't enter a search parameter.")
            return redirect(url_for('index'))

        # If user hasn't specified a number of Tweets to search for, use defualt (15)
        if not count:
            count = config.TWEET_SEARCH_LIMIT_DEFAULT

        # If count provided, check to sure it does not exceed max (100)
        if int(count) > config.TWEET_SEARCH_LIMIT_MAX:
            # If exceeds max, set count to max
            count = config.TWEET_SEARCH_LIMIT_MAX

        # Check if supplied count is negative
        if int(count) <= 0:
            flash("Please enter a valid number of Tweets to search (1-100)")
            return redirect(url_for('index'))

        user_search = False
        location_search_term = None

        # Get time
        t1 = time.time()

        # Attempt to perform search, analysis and storage
        try:
            if keyword and not location:
                keyword = keyword.lower()
                # Check if searching for a user
                if keyword[0] == "@":
                    user_search = True
                    manager.analysis_supervisor(manager.search_user(keyword, count), keyword)
                else:
                    manager.analysis_supervisor(manager.search_keyword(keyword, count), keyword)
            if keyword and location:
                keyword = keyword.lower()
                location = location.lower()
                # Check if searching for a user
                if keyword[0] == "@":
                    user_search = True
                    manager.analysis_supervisor(manager.search_user(keyword, count), keyword)
                else:
                    location_search_term = location
                    location = manager.get_geo_info(location_search_term)
                    manager.analysis_supervisor(manager.search_keyword(keyword, count, location), keyword, location_search_term)
        except Exception as e:
            # Exception handling for any errors that may occur in retrieving / analyzing / saving data
            e = str(e)
            if e == "No Twitter results returned":
                if location_search_term:
                    flash("Sorry, Twitter returned no results for: \"" + keyword + "\" near " + "\"" +
                          location_search_term + "\"")
                else:
                    flash("Sorry, Twitter returned no results for: \"" + keyword + "\"")
            elif e == "Twython auth error":
                flash("Oops, it appears we're having trouble connecting to Twitter. Please try again later.")
            elif e == "AlchemyAPI auth error":
                flash("Oops, it appears we're having trouble connecting to our language processing API. "
                      "Please try again later.")
            elif e == "AlchemyAPI error":
                flash("Oops, it appears we had trouble analyzing one or more of the results for that search")
            elif e == "Not an English language user":
                flash("Sorry, it appears that account " + keyword + " is not an English language user. Presently, "
                      "Tweetvibe can only analyze English tweets.")
            elif e == "Location error":
                flash("Oops, it appears we had trouble identifying location " + "\"" + location_search_term + "\"")
            elif e == "Database error":
                flash("Oops, it appears we are experiencing a problem interacting with our database.")
            else:
                flash("Oops, something went wrong. A team of highly trained engineer monkeys have been dispatched to"
                      " fix the problem. Please try again later.")
            # Redirect to index with flash message
            return redirect(url_for('index'))

        # Attempt to get results from database
        try:
            if location:
                results = Tweet.objects(Q(keyword_search_term=keyword) & Q(location_address=location.address)).order_by('-stored_at', '-tweet_time').limit(int(count))
            else:
                results = Tweet.objects(keyword_search_term=keyword).order_by('-stored_at', '-tweet_time').limit(int(count))
        except:
            flash("Oops, it appears we are experiencing a problem querying our database.")
            return redirect(url_for('index'))

        try:
            search_aggregate = helper.aggregate_sentiment(results)
            search_avg = helper.get_query_sentiment_avg(results)
        except:
            flash("Oops, something went wrong. A team of highly trained engineer monkeys have been dispatched to"
                  " fix the problem. Please try again later.")
            return redirect(url_for('index'))

        # Calculate time taken to perform search and analysis
        t2 = time.time()
        time_taken = t2 - t1

        if location:
            # Format location latitude/longitude to 2 decimal places
            longitude = "{:.2f}".format(float(location.longitude))
            latitude = "{:.2f}".format(float(location.latitude))
            hist_avg = helper.get_historical_sentiment_avg(keyword, location.address)
            hist_data = helper.get_historical_sentiment(keyword, location.address)
            hist_predominant_sentiment = helper.predominant_sentiment(hist_data)
            return render_template(
                "search.html",
                time_taken=time_taken,
                results=results,
                page=page,
                keyword=keyword,
                location_search_term=location_search_term,
                location=location.address,
                longitude=longitude,
                latitude=latitude,
                search_count=count,
                search_aggregate=search_aggregate,
                search_avg=search_avg,
                search_stats=helper.get_query_statistics(results, search_aggregate),
                dom_sentiment=hist_predominant_sentiment,
                hist_data=hist_data,
                hist_avg=hist_avg,
                overtime_data=helper.get_sentiment_overtime(keyword, location.address)
            )
        elif user_search:
            hist_avg = helper.get_historical_sentiment_avg(keyword)
            hist_data = helper.get_historical_sentiment(keyword)
            hist_predominant_sentiment = helper.predominant_sentiment(hist_data)
            return render_template(
                "search.html",
                results=results,
                time_taken=time_taken,
                page=page,
                user=keyword,
                search_count=count,
                search_aggregate=search_aggregate,
                search_avg=search_avg,
                search_stats=helper.get_query_statistics(results, search_aggregate),
                dom_sentiment=hist_predominant_sentiment,
                hist_data=hist_data,
                hist_avg=hist_avg,
                overtime_data=helper.get_sentiment_overtime(keyword)
            )
        else:
            hist_avg = helper.get_historical_sentiment_avg(keyword)
            hist_data = helper.get_historical_sentiment(keyword)
            hist_predominant_sentiment = helper.predominant_sentiment(hist_data)
            return render_template(
                "search.html",
                results=results,
                time_taken=time_taken,
                page=page,
                keyword=keyword,
                search_count=count,
                search_aggregate=search_aggregate,
                search_avg=search_avg,
                search_stats=helper.get_query_statistics(results, search_aggregate),
                dom_sentiment=hist_predominant_sentiment,
                hist_data=hist_data,
                hist_avg=hist_avg,
                overtime_data=helper.get_sentiment_overtime(keyword)
            )
    else:
        return redirect(url_for('index'))