def bigram(request, search_term=None): start = datetime.now() search_term = extract_search_term(request, search_term) start_date = request.POST.get('start_date', None) end_date = request.POST.get('end_date', None) tweet_count = request.POST.get('tweet_count', None) if tweet_count: tweet_count = int(tweet_count.replace(',', '')) result_size = request.POST.get('result_size', None) if result_size: result_size = int(result_size.replace(',', '')) db = Database() vis = Visualization() cleaner = DataCleaner() analysis = Analysis() tweets = pd.DataFrame() if search_term or request.method == "POST": tweets = db.get_tweet_entities(search_term, tweet_count, start_date, end_date, tweet_count) html = '' first_tweet_date = '' last_tweet_date = '' if tweets is not None: if not tweets.empty: excluded_terms = [search_term] excluded_terms += cleaner.excluded_tokens df_coocmatrix, df_cooc = analysis.cooccurrence(tweets, 'entities', result_size, excluded_terms, ngram=(2, 2)) # db.upsert_coocmatrix(search_term, df_coocmatrix) # db.upsert_cooc(search_term, df_cooc) html = vis.network_pyvis(df_cooc) tweet_count = len(tweets.index) result_size = len(df_cooc.index) first_tweet_date = datetime.strftime(tweets.min()['created_at'], '%m/%d/%Y %H:%M') last_tweet_date = datetime.strftime(tweets.max()['created_at'], '%m/%d/%Y %H:%M') context = { 'search_term': search_term if search_term else '-', 'graphic': html, 'tweet_count': tweet_count, 'result_size': result_size, 'first_tweet_date': first_tweet_date, 'last_tweet_date': last_tweet_date, 'time_elapsed': (datetime.now() - start).seconds, } return render(request, 'tweets/bigram.html', context)
def frequency(request, search_term=None): start = datetime.now() search_term = extract_search_term(request, search_term) start_date = request.POST.get('start_date', None) end_date = request.POST.get('end_date', None) tweet_count = request.POST.get('tweet_count', None) if tweet_count: tweet_count = int(tweet_count.replace(',', '')) result_size = request.POST.get('result_size', None) if result_size: result_size = int(result_size.replace(',', '')) db = Database() analysis = Analysis() vis = Visualization() cleaner = DataCleaner() tweets = pd.DataFrame() if search_term or request.method == "POST": tweets = db.get_tweets(search_term, tweet_count, start_date, end_date) script = '' div = '' first_tweet_date = '' last_tweet_date = '' if tweets is not None: if not tweets.empty: excluded_words = [search_term] excluded_words += cleaner.excluded_tokens df = analysis.frequency(df=tweets, count=result_size, excluded_words=excluded_words) script, div = vis.frequency(df, search_term) tweet_count = len(tweets.index) result_size = len(df.index) first_tweet_date = datetime.strftime(tweets.min()['created_at'], '%m/%d/%Y %H:%M') last_tweet_date = datetime.strftime(tweets.max()['created_at'], '%m/%d/%Y %H:%M') context = { 'search_term': search_term if search_term else '-', 'script': script, 'div': div, 'tweet_count': tweet_count, 'result_size': result_size, 'first_tweet_date': first_tweet_date if first_tweet_date else '-', 'last_tweet_date': last_tweet_date if last_tweet_date else '-', 'time_elapsed': (datetime.now() - start).seconds, } return render(request, 'tweets/frequency.html', context)
def update_cooc(): db = Database() analysis = Analysis() df_autosearch = db.get_all_autosearches() for i, row in df_autosearch.iterrows(): search_term = row.search_term tweets = db.get_tweet_entities(search_term) df_coocmatrix, df_cooc = analysis.cooccurrence(tweets, col='entities', excluded_words=[]) db.upsert_coocmatrix(search_term, df_coocmatrix) db.upsert_cooc(search_term, df_cooc)
def hashtag_network(request, search_term=None): start = datetime.now() search_term = extract_search_term(request, search_term) start_date = request.POST.get('start_date', None) end_date = request.POST.get('end_date', None) tweet_count = request.POST.get('tweet_count', None) if tweet_count: tweet_count = int(tweet_count.replace(',', '')) hashtag_count = request.POST.get('hashtag_count', None) if hashtag_count: hashtag_count = int(hashtag_count.replace(',', '')) db = Database() analysis = Analysis() vis = Visualization() cleaner = DataCleaner() df = pd.DataFrame() if search_term or request.method == "POST": tweets = db.get_tweets(search_term, tweet_count, start_date, end_date) script = '' div = '' if not df.empty: df_coocmatrix, df_cooc = analysis.cooccurrence(tweets, 'hashtag', hashtag_count, excluded_terms=None) db.upsert_coocmatrix(search_term, df_coocmatrix) db.upsert_cooc(search_term, df_cooc) html = vis.network_pyvis(df_cooc) context = { 'search_term': search_term if search_term else '-', 'script': script, 'div': div, 'hashtag_count': hashtag_count, 'first_tweet_date': start_date if start_date else '-', 'last_tweet_date': end_date if end_date else '-', 'time_elapsed': (datetime.now() - start).seconds, } return render(request, 'tweets/hashtag.html', context)
def sentiment(request, search_term=None): start = datetime.now() search_term = extract_search_term(request, search_term) start_date = request.POST.get('start_date', None) end_date = request.POST.get('end_date', None) tweet_count = request.POST.get('tweet_count', None) if tweet_count: tweet_count = int(tweet_count.replace(',', '')) db = Database() analysis = Analysis() vis = Visualization() tweets = pd.DataFrame() if search_term or request.method == "POST": tweets = db.get_tweets(search_term, tweet_count, start_date, end_date) script = '' div = '' first_tweet_date = '' last_tweet_date = '' if not tweets.empty: computed_tweets = analysis.sentiment(tweets) script, div = vis.plot_daily_sentiment(computed_tweets) tweet_count = len(tweets.index) first_tweet_date = datetime.strftime(tweets.min()['created_at'], '%m/%d/%Y %H:%M') last_tweet_date = datetime.strftime(tweets.max()['created_at'], '%m/%d/%Y %H:%M') context = { 'search_term': search_term if search_term else '-', 'tweets': tweets, 'script': script, 'div': div, 'tweet_count': tweet_count, 'first_tweet_date': first_tweet_date if first_tweet_date else '-', 'last_tweet_date': last_tweet_date if last_tweet_date else '-', 'time_elapsed': (datetime.now() - start).seconds, } return render(request, 'tweets/sentiment.html', context)
def insert_tweets(self, df): logger = logging.getLogger(__name__) analysis = Analysis() func = Functions() sql = "INSERT INTO tweets_tweet (tweet_id, unix, created_at, tweet, sentiment, entities, entities_extracted) VALUES(%s, %s, %s, %s, %s, %s, %s);" try: with connection.cursor() as cursor: for i, row in df.iterrows(): sentiment = analysis.polarity(row.text) created_at = func.convert_twitter_date(row.created_at) cursor.execute(sql, (row.id, row.created_at, created_at, row.text, sentiment, row.entities, 1)) connection.commit() except (Exception) as error: print(error) logger.error(error) finally: if connection is not None: connection.close()
def search(request, search_term=None): api = TwitterApi() db = Database() func = Functions() analysis = Analysis() vis = Visualization() search_term = request.GET.get('search_term') include_synonyms = request.GET.get('include_synonyms') save_search = request.GET.get('save_search') save_tweets = request.GET.get('save_tweets') repeatable = request.GET.get('repeatable') if not search_term: return render(request, 'tweets/search.html') # TODO: Duplicate code: See background_task.py collect_data. max_id = db.get_max_tweet_id() # Step 2 tweets = api.search(search_term, max=10, since_id=max_id) # Step 3 if (len(tweets) > 0): df = func.to_df_from_json(tweets) nlp = spacy.load('en_core_web_sm') df['entities'] = df['text'].apply( lambda x: (', '.join(ent.text for ent in nlp(x).ents))) db.insert_tweets(df) # Step 4 if save_search == 'on': db.insert_search(search_term, request.user.id) context = {'search_term': search_term} url = reverse('index') return redirect(url)
def hashtag(request, search_term=None): start = datetime.now() search_term = extract_search_term(request, search_term) start_date = request.POST.get('start_date', None) end_date = request.POST.get('end_date', None) hashtag_count = request.POST.get('hashtag_count', None) if hashtag_count: hashtag_count = int(hashtag_count.replace(',', '')) db = Database() analysis = Analysis() vis = Visualization() cleaner = DataCleaner() df = pd.DataFrame() if search_term or request.method == "POST": df = db.get_hashtags(search_term, max=hashtag_count) script = '' div = '' if not df.empty: script, div = vis.frequency(df, search_term) hashtag_count = len(df.index) context = { 'search_term': search_term if search_term else '-', 'script': script, 'div': div, 'hashtag_count': hashtag_count, 'first_tweet_date': start_date if start_date else '-', 'last_tweet_date': end_date if end_date else '-', 'time_elapsed': (datetime.now() - start).seconds, } return render(request, 'tweets/hashtag.html', context)