コード例 #1
0
ファイル: views.py プロジェクト: kahramanbayraktar/sentiwent
def bigram(request, search_term=None):
    start = datetime.now()

    search_term = extract_search_term(request, search_term)
    start_date = request.POST.get('start_date', None)
    end_date = request.POST.get('end_date', None)
    tweet_count = request.POST.get('tweet_count', None)
    if tweet_count:
        tweet_count = int(tweet_count.replace(',', ''))
    result_size = request.POST.get('result_size', None)
    if result_size:
        result_size = int(result_size.replace(',', ''))

    db = Database()
    vis = Visualization()
    cleaner = DataCleaner()
    analysis = Analysis()

    tweets = pd.DataFrame()
    if search_term or request.method == "POST":
        tweets = db.get_tweet_entities(search_term, tweet_count, start_date,
                                       end_date, tweet_count)

    html = ''
    first_tweet_date = ''
    last_tweet_date = ''

    if tweets is not None:
        if not tweets.empty:
            excluded_terms = [search_term]
            excluded_terms += cleaner.excluded_tokens
            df_coocmatrix, df_cooc = analysis.cooccurrence(tweets,
                                                           'entities',
                                                           result_size,
                                                           excluded_terms,
                                                           ngram=(2, 2))

            # db.upsert_coocmatrix(search_term, df_coocmatrix)
            # db.upsert_cooc(search_term, df_cooc)

            html = vis.network_pyvis(df_cooc)

            tweet_count = len(tweets.index)
            result_size = len(df_cooc.index)
            first_tweet_date = datetime.strftime(tweets.min()['created_at'],
                                                 '%m/%d/%Y %H:%M')
            last_tweet_date = datetime.strftime(tweets.max()['created_at'],
                                                '%m/%d/%Y %H:%M')

    context = {
        'search_term': search_term if search_term else '-',
        'graphic': html,
        'tweet_count': tweet_count,
        'result_size': result_size,
        'first_tweet_date': first_tweet_date,
        'last_tweet_date': last_tweet_date,
        'time_elapsed': (datetime.now() - start).seconds,
    }

    return render(request, 'tweets/bigram.html', context)
コード例 #2
0
ファイル: views.py プロジェクト: kahramanbayraktar/sentiwent
def frequency(request, search_term=None):
    start = datetime.now()

    search_term = extract_search_term(request, search_term)
    start_date = request.POST.get('start_date', None)
    end_date = request.POST.get('end_date', None)
    tweet_count = request.POST.get('tweet_count', None)
    if tweet_count:
        tweet_count = int(tweet_count.replace(',', ''))
    result_size = request.POST.get('result_size', None)
    if result_size:
        result_size = int(result_size.replace(',', ''))

    db = Database()
    analysis = Analysis()
    vis = Visualization()
    cleaner = DataCleaner()

    tweets = pd.DataFrame()
    if search_term or request.method == "POST":
        tweets = db.get_tweets(search_term, tweet_count, start_date, end_date)

    script = ''
    div = ''
    first_tweet_date = ''
    last_tweet_date = ''

    if tweets is not None:
        if not tweets.empty:
            excluded_words = [search_term]
            excluded_words += cleaner.excluded_tokens
            df = analysis.frequency(df=tweets,
                                    count=result_size,
                                    excluded_words=excluded_words)

            script, div = vis.frequency(df, search_term)

            tweet_count = len(tweets.index)
            result_size = len(df.index)
            first_tweet_date = datetime.strftime(tweets.min()['created_at'],
                                                 '%m/%d/%Y %H:%M')
            last_tweet_date = datetime.strftime(tweets.max()['created_at'],
                                                '%m/%d/%Y %H:%M')

    context = {
        'search_term': search_term if search_term else '-',
        'script': script,
        'div': div,
        'tweet_count': tweet_count,
        'result_size': result_size,
        'first_tweet_date': first_tweet_date if first_tweet_date else '-',
        'last_tweet_date': last_tweet_date if last_tweet_date else '-',
        'time_elapsed': (datetime.now() - start).seconds,
    }

    return render(request, 'tweets/frequency.html', context)
コード例 #3
0
def update_cooc():
    db = Database()
    analysis = Analysis()

    df_autosearch = db.get_all_autosearches()

    for i, row in df_autosearch.iterrows():
        search_term = row.search_term
        tweets = db.get_tweet_entities(search_term)

        df_coocmatrix, df_cooc = analysis.cooccurrence(tweets,
                                                       col='entities',
                                                       excluded_words=[])
        db.upsert_coocmatrix(search_term, df_coocmatrix)
        db.upsert_cooc(search_term, df_cooc)
コード例 #4
0
ファイル: views.py プロジェクト: kahramanbayraktar/sentiwent
def hashtag_network(request, search_term=None):
    start = datetime.now()

    search_term = extract_search_term(request, search_term)
    start_date = request.POST.get('start_date', None)
    end_date = request.POST.get('end_date', None)
    tweet_count = request.POST.get('tweet_count', None)
    if tweet_count:
        tweet_count = int(tweet_count.replace(',', ''))
    hashtag_count = request.POST.get('hashtag_count', None)
    if hashtag_count:
        hashtag_count = int(hashtag_count.replace(',', ''))

    db = Database()
    analysis = Analysis()
    vis = Visualization()
    cleaner = DataCleaner()

    df = pd.DataFrame()
    if search_term or request.method == "POST":
        tweets = db.get_tweets(search_term, tweet_count, start_date, end_date)

    script = ''
    div = ''

    if not df.empty:
        df_coocmatrix, df_cooc = analysis.cooccurrence(tweets,
                                                       'hashtag',
                                                       hashtag_count,
                                                       excluded_terms=None)

        db.upsert_coocmatrix(search_term, df_coocmatrix)
        db.upsert_cooc(search_term, df_cooc)

        html = vis.network_pyvis(df_cooc)

    context = {
        'search_term': search_term if search_term else '-',
        'script': script,
        'div': div,
        'hashtag_count': hashtag_count,
        'first_tweet_date': start_date if start_date else '-',
        'last_tweet_date': end_date if end_date else '-',
        'time_elapsed': (datetime.now() - start).seconds,
    }

    return render(request, 'tweets/hashtag.html', context)
コード例 #5
0
ファイル: views.py プロジェクト: kahramanbayraktar/sentiwent
def sentiment(request, search_term=None):
    start = datetime.now()

    search_term = extract_search_term(request, search_term)
    start_date = request.POST.get('start_date', None)
    end_date = request.POST.get('end_date', None)
    tweet_count = request.POST.get('tweet_count', None)
    if tweet_count:
        tweet_count = int(tweet_count.replace(',', ''))

    db = Database()
    analysis = Analysis()
    vis = Visualization()

    tweets = pd.DataFrame()
    if search_term or request.method == "POST":
        tweets = db.get_tweets(search_term, tweet_count, start_date, end_date)

    script = ''
    div = ''
    first_tweet_date = ''
    last_tweet_date = ''

    if not tweets.empty:
        computed_tweets = analysis.sentiment(tweets)
        script, div = vis.plot_daily_sentiment(computed_tweets)

        tweet_count = len(tweets.index)
        first_tweet_date = datetime.strftime(tweets.min()['created_at'],
                                             '%m/%d/%Y %H:%M')
        last_tweet_date = datetime.strftime(tweets.max()['created_at'],
                                            '%m/%d/%Y %H:%M')

    context = {
        'search_term': search_term if search_term else '-',
        'tweets': tweets,
        'script': script,
        'div': div,
        'tweet_count': tweet_count,
        'first_tweet_date': first_tweet_date if first_tweet_date else '-',
        'last_tweet_date': last_tweet_date if last_tweet_date else '-',
        'time_elapsed': (datetime.now() - start).seconds,
    }

    return render(request, 'tweets/sentiment.html', context)
コード例 #6
0
    def insert_tweets(self, df):
        logger = logging.getLogger(__name__)

        analysis = Analysis()
        func = Functions()

        sql = "INSERT INTO tweets_tweet (tweet_id, unix, created_at, tweet, sentiment, entities, entities_extracted) VALUES(%s, %s, %s, %s, %s, %s, %s);"

        try:
            with connection.cursor() as cursor:
                for i, row in df.iterrows():                    
                    sentiment = analysis.polarity(row.text)

                    created_at = func.convert_twitter_date(row.created_at)
                    
                    cursor.execute(sql, (row.id, row.created_at, created_at, row.text, sentiment, row.entities, 1))
                connection.commit()
        except (Exception) as error:
            print(error)
            logger.error(error)
        finally:
            if connection is not None:
                connection.close()
コード例 #7
0
ファイル: views.py プロジェクト: kahramanbayraktar/sentiwent
def search(request, search_term=None):
    api = TwitterApi()
    db = Database()
    func = Functions()
    analysis = Analysis()
    vis = Visualization()

    search_term = request.GET.get('search_term')
    include_synonyms = request.GET.get('include_synonyms')
    save_search = request.GET.get('save_search')
    save_tweets = request.GET.get('save_tweets')
    repeatable = request.GET.get('repeatable')

    if not search_term:
        return render(request, 'tweets/search.html')

    # TODO: Duplicate code: See background_task.py collect_data.
    max_id = db.get_max_tweet_id()

    # Step 2
    tweets = api.search(search_term, max=10, since_id=max_id)

    # Step 3
    if (len(tweets) > 0):
        df = func.to_df_from_json(tweets)

        nlp = spacy.load('en_core_web_sm')
        df['entities'] = df['text'].apply(
            lambda x: (', '.join(ent.text for ent in nlp(x).ents)))

        db.insert_tweets(df)

        # Step 4
        if save_search == 'on':
            db.insert_search(search_term, request.user.id)

    context = {'search_term': search_term}

    url = reverse('index')
    return redirect(url)
コード例 #8
0
ファイル: views.py プロジェクト: kahramanbayraktar/sentiwent
def hashtag(request, search_term=None):
    start = datetime.now()

    search_term = extract_search_term(request, search_term)
    start_date = request.POST.get('start_date', None)
    end_date = request.POST.get('end_date', None)
    hashtag_count = request.POST.get('hashtag_count', None)
    if hashtag_count:
        hashtag_count = int(hashtag_count.replace(',', ''))

    db = Database()
    analysis = Analysis()
    vis = Visualization()
    cleaner = DataCleaner()

    df = pd.DataFrame()
    if search_term or request.method == "POST":
        df = db.get_hashtags(search_term, max=hashtag_count)

    script = ''
    div = ''

    if not df.empty:
        script, div = vis.frequency(df, search_term)

        hashtag_count = len(df.index)

    context = {
        'search_term': search_term if search_term else '-',
        'script': script,
        'div': div,
        'hashtag_count': hashtag_count,
        'first_tweet_date': start_date if start_date else '-',
        'last_tweet_date': end_date if end_date else '-',
        'time_elapsed': (datetime.now() - start).seconds,
    }

    return render(request, 'tweets/hashtag.html', context)