def load_data_2(limit=250, split=0.7):
    mysql_con = mysqlClient.mysql_connection()
    get_tweets_query = f"""
            SELECT contents COLLATE utf8mb4_unicode_ci, 'bullish'  FROM tweets.AMZN where contents like 'up%' or contents like 'buy%'
        union all
        SELECT contents COLLATE utf8mb4_unicode_ci, 'bullish'  FROM tweets.AMD where contents like 'up%' or contents like 'buy%'
        union all
        SELECT contents COLLATE utf8mb4_unicode_ci, 'bullish'  FROM tweets.AAPL where contents like 'up%' or contents like 'buy%'
        union all
        SELECT contents COLLATE utf8mb4_unicode_ci, 'bullish'  FROM tweets.GOOG where contents like 'up%' or contents like 'buy%'
        union all
        SELECT contents COLLATE utf8mb4_unicode_ci, 'bullish'  FROM tweets.FB where contents like 'up%' or contents like 'buy%'
        union all
        SELECT contents COLLATE utf8mb4_unicode_ci, 'bullish'  FROM tweets.MSFT where contents like 'up%' or contents like 'buy%'

        union all

        SELECT contents  COLLATE utf8mb4_unicode_ci, 'bearish'  FROM tweets.AMZN where contents like 'down%' or contents like 'sell%' or contents like 'crash%'
        union all
        SELECT contents  COLLATE utf8mb4_unicode_ci, 'bearish'  FROM tweets.AMD where contents like 'down%' or contents like 'sell%' or contents like 'crash%'
        union all
        SELECT contents  COLLATE utf8mb4_unicode_ci, 'bearish'  FROM tweets.AAPL where contents like 'down%' or contents like 'sell%' or contents like 'crash%'
        union all
        SELECT contents  COLLATE utf8mb4_unicode_ci, 'bearish'  FROM tweets.GOOG where contents like 'down%' or contents like 'sell%' or contents like 'crash%'
        union all
        SELECT contents  COLLATE utf8mb4_unicode_ci, 'bearish'  FROM tweets.FB where contents like 'down%' or contents like 'sell%' or contents like 'crash%'
                union all
        SELECT contents  COLLATE utf8mb4_unicode_ci, 'bearish'  FROM tweets.MSFT where contents like 'short%' or contents like 'bear%' or contents like 'crash%'

        union all

        SELECT contents COLLATE utf8mb4_unicode_ci, 'neutral'  FROM tweets.AMZN where CATEGORY like 'neutral%'
        """
    tweets = mysql_con.select_query(get_tweets_query)
    tweets_df = pd.DataFrame(list(tweets), columns=['contents', 'category'])
    training_df_bullish = tweets_df.query("category == 'bullish'").sample(
        limit, replace=True)
    training_df_bearish = tweets_df.query("category == 'bearish'").sample(
        limit, replace=True)
    training_df_neutral = tweets_df.query("category == 'neutral'").sample(
        limit, replace=True)
    """Load data from the IMDB dataset."""
    # Partition off part of the train data for evaluation
    train_data = pd.concat(
        [training_df_bullish, training_df_bearish,
         training_df_neutral]).values.tolist()
    # train_data = pd.concat([training_df_positive, training_df_negative]).values.tolist()
    # train_data = [(Doc(i[0]), i[1]) for i in train_data]
    texts, labels = zip(*train_data)
    cats = [{
        'Neutral': float(y == 'neutral'),
        'Bullish': float(y == 'bullish'),
        'Bearish': float(y == 'bearish')
    } for y in labels]
    # cats = [{'Bullish':float(y=='Positive'), 'Bearish':float(y=='Negative')} for y in labels]
    split = int(len(train_data) * split)
    return (texts[:split], cats[:split]), (texts[split:], cats[split:])
Esempio n. 2
0
import pandas as pd
import datetime
import numpy as np


def in_trading_hour(x):
    start = datetime.time(14, 30)
    end = datetime.time(21, 0)
    """Return true if x is in the range [start, end]"""
    if start <= end:
        return start <= x <= end
    else:
        return start <= x or x <= end


mysql_con = mysqlClient.mysql_connection()
mysql_con.init_engine('tweets_features')
engine = mysql_con.engine
# ,
for stocks in ['AAPL', 'AMD', 'AMZN', 'FB', 'GOOG', 'MSFT']:
    print(f'getting {stocks} data...')
    data = mysql_con.select_query(f"SELECT * from tweets_features.{stocks}_CLASSIFIED;")
    data_df = pd.DataFrame(list(data),
                           columns=['GUID', 'DATETIME', 'URL', 'CONTENTS', 'AUTHOR', 'NAME', 'COUNTRY',
                                    'STATE/REGION', 'CITY/URBAN_AREA', 'CATEGORY', 'EMOTION', 'SOURCE', 'GENDER',
                                    'POSTS', 'FOLLOWERS', 'FOLLOWING', 'POST_TITLES', 'POST_TYPE', 'IMAGE_URL',
                                    'BRAND', 'BULLISH', 'BEARISH', 'NEUTRAL'])
    data_df.set_index('DATETIME', inplace=True)
    data_df.drop(columns=['GUID'], inplace=True)

    # TODO: Factorize location data and sum the count for each location as a feature
Esempio n. 3
0
import json
import mysqlClient

db_helper = mysqlClient.mysql_connection()
for stock in ['AAPL', 'AMD', 'AMZN', 'FB', 'GOOG', 'MSFT']:
    try:
        file_path = f'stockDataCollector/5m_data/{stock}2.json'
        with open(file_path, 'r') as f:
            data_unload = json.load(f)
            prices = data_unload['Time Series (5min)']
            insert_query = "replace into stock_prices.{} values ".format(stock)
            for k, v in prices.items():
                ts = k
                _open = v['1. open']
                high = v['2. high']
                low = v['3. low']
                _close = v['4. close']
                vol = v['5. volume']
                insert_query += f"('{ts}', {_open}, {high}, {low}, {_close}, {vol}),"
            insert_query = insert_query[:-1]
            insert_query += ';'
            print(insert_query)
            db_helper.commit_query(insert_query)
    except:
        pass