コード例 #1
0
def stock_autocorr(ticker):
    stock = DataReader(ticker, data_source, start_date)
    stock['mean'] = (stock['Open'] + stock['High'] + stock['Low'] +
                     stock['Close']) / 4
    stock_change = stock.drop(drop_col, axis=1)
    stock_change['pct_change'] = stock_change.pct_change()
    stock_autocorr = stock_change['pct_change'].autocorr()
    print(
        'Over the past {0} years, the auto-correlation of {1} daily point change is:{2}'
        .format(years, ticker, stock_autocorr))
コード例 #2
0
pd.set_option('display.width', 400)
pd.set_option('display.max_columns', 10)
import numpy as np
import matplotlib.pyplot as plt
from pandas_datareader.data import DataReader
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
import pprint

beginDate = '2011-03-04'
endDate = '2021-03-04'
stock = 'AAPL'
df = DataReader(stock, 'yahoo', beginDate, endDate)
#df = pd.read_csv('MSFT.csv',index_col=0)
df['Close'] = df['Adj Close']
df = df.drop('Adj Close', axis=1)

# Moving averages with periods 5,10,20,50,100,200 days
for ma_period in [5, 10, 20, 50, 100, 200]:
    indicator_name = 'ma_%d' % (ma_period)
    df[indicator_name] = df['Close'].rolling(ma_period).mean()

# Bollinger bands (the moving average plus and minus 1 and 2 standard deviations)
df['Boll_Up_20_2'] = df['Close'].rolling(
    20).mean() + 2 * df['Close'].rolling(20).std()
df['Boll_Down_20_2'] = df['Close'].rolling(
    20).mean() - 2 * df['Close'].rolling(20).std()
df['Boll_Up_20_1'] = df['Close'].rolling(20).mean() + df['Close'].rolling(
    20).std()
df['Boll_Down_20_1'] = df['Close'].rolling(20).mean() - df['Close'].rolling(
    20).std()
コード例 #3
0
# note: if using line above, loading from csv, comment out section below
#
# User input and load ticker data from yahoo finance
while True:
    try: 
        ticker = input('Enter Stock Ticker: ').upper()
        df = DataReader(ticker, 'yahoo', beginDate, endDate)
        #df = yf.Ticker(ticker, start=beginDate, end=endDate)
    except:
        print('\nStock Ticker Symbol does not exist!\n')
        continue;
    break
#
# Use Adj Close instead of Close
df['Close'] = df['Adj Close']
df = df.drop(columns=['Adj Close','High','Low','Open'], axis=1)
print(f'\n{df.describe()}\n')
df['Moving_av']= df['Close'].rolling(window=50,min_periods=0).mean()
df['Moving_av'].plot()

i=1
rate_increase_in_vol=[0]
rate_increase_in_close=[0]
while i<len(df):
    rate_increase_in_vol.append(df.iloc[i]['Volume']-df.iloc[i-1]['Volume'])
    rate_increase_in_close.append(df.iloc[i]['Close']-df.iloc[i-1]['Close'])
    i+=1
df['Increase_in_vol']=rate_increase_in_vol
df['Increase_in_close']=rate_increase_in_close
df['Increase_in_vol'].plot()
df['Increase_in_close'].plot()
コード例 #4
0
    def __update(self):
        """Updates class attributes."""
        p, mv, rf = self.__prices, self.__mv, self.__rf
        # Select attributes different from 'None'
        li = [x for x in (p, mv, rf) if x is not None]

        # if there is no element in the list, i.e., if all attributes are 'None'
        if len(li) == 0:
            self.__date = None
        # if there is only one element not 'None' in the list, 'self.__date' should be equal to its index
        elif len(li) == 1:
            self.__date: np.ndarray = li[0].index.to_numpy()
        # if there is at least 2 attributes that are not 'None' we must verify if rows match in length and in values
        else:
            # if lengths match (to prevent ValueError)
            if self.__check_index_length_match(li):
                # if length and values are the same
                if self.__check_index_values_match(li):
                    self.__date = li[0].index.to_numpy().copy()
                # if lengths are equal among each dataset index, but not the values
                else:
                    # if values do not match, we force them to take the same
                    print(
                        "Lengths of rows match, but not they have different values."
                    )
                    self.__date = li[0].index.to_numpy().copy()
                    self.__make_indices_values_match()
                    assert self.__check_index_values_match(li)
            # if any length mismatch, we truncate all DataFrames or Series
            else:
                # Get the oldest date among the list of DataFrames
                min_date = min([df.index.min() for df in li])
                # In the case there is a risk-free rate and that it begins after the other series: try
                # to complete it with the 3 month proxy
                if (self.__rf is not None) & (self.__rf.index[0] > min_date):
                    # Get initial date of the risk-free rate series
                    end = rf.index[0]
                    # 3-Month Treasury Constant Maturity Rate (GS3M)
                    rf3m = DataReader('GS3M', 'fred', start=min_date,
                                      end=end).resample('MS').mean()
                    # We have to drop the last row to prevent overlapping
                    # We couldn't have used timedelta to go back 1 month as some have 31 days while others 30
                    rf3m.drop(rf3m.tail(1).index, inplace=True)
                    rf3m.columns = rf.columns
                    rf3m = rf3m.div(100).div(12)
                    # Concatenate both risk-free rates pd.Series
                    rf_concat = pd.concat([rf3m, self.__rf], sort=True)
                    errmsg: str = f"Got {rf_concat.shape} shape, but ({len(li[0].index)}, 1) expected."
                    assert rf_concat.shape[1] == 1, errmsg
                    self.__rf = rf_concat
                    # Join both series in a sole one
                    # self.__rf = rf_concat.iloc[:, 0].add(rf_concat.iloc[:, 1], fill_value=0)
                else:
                    # Truncate rows of different length according to their dates
                    self.__truncate_rows()
                    # Verify if the rows were correctly truncated
                    not_none_attributes_list = self.__among_not_none_attributes(
                    )
                    err_message = "Rows were not correctly truncated"
                    assert self.__check_index_length_match(
                        not_none_attributes_list), err_message
                    # Update the 'self.__date' attribute with the first item
                    self.__date = not_none_attributes_list[0].index.to_numpy(
                    ).copy()
                    # Propagate same indexes to the other datasets to force a perfect match
                    self.__make_indices_values_match()

                    # Verify that indices have same indexes
                    err_message = "Values do not match among not 'None' attributes."
                    assert self.__check_index_values_match(
                        self.__among_not_none_attributes()), err_message
                self.__update()
コード例 #5
0
#df = pd.read_csv('XXXXXX.csv',index_col=0)
# note: if using line above, loading from csv, comment out section below
#
# User input and load ticker data from yahoo finance
while True:
    try:
        ticker = input('Enter Stock Ticker: ').upper()
        df = DataReader(ticker, 'yahoo', beginDate, endDate)
    except:
        print('\nStock Ticker Symbol does not exist!\n')
        continue
    break
#
# Use Adj Close instead of Close
df['Close'] = df['Adj Close']
df = df.drop('Adj Close', axis=1)
df = df.drop('High', axis=1)
df = df.drop('Low', axis=1)
df = df.drop('Open', axis=1)

# Future Days - number of days to predict
modelData = df['Close'].to_frame()
futureDays = 30
modelData['Predict'] = modelData['Close'].shift(-futureDays)
modelData = modelData.dropna()
print(modelData)
x = np.array(modelData.drop(['Predict'], 1))[:-futureDays]
#print(x)
y = np.array(modelData['Predict'])[:-futureDays]
#print(y)
#
コード例 #6
0
def stock_info(ticker):

    today = datetime.today()
    beginDate = '2020-01-01'
    endDate = datetime.datetime.now().date()

    script = "9ho5HG7o00PT-g"
    secret = "2CQTFbYyYp5aLEN7bHkKGO8X4E3YHQ"

    beginDate = '2020-12-01'
    endDate = datetime.today().strftime('%Y-%m-%d')

    def df_from_response(res):
        df = pd.DataFrame()

        for post in res.json()['data']['children']:
            df = df.append(
                {
                    'subreddit':
                    post['data']['subreddit'],
                    'title':
                    post['data']['title'],
                    'selftext':
                    post['data']['selftext'],
                    'num_comments':
                    post['data']['num_comments'],
                    'upvote_ratio':
                    post['data']['upvote_ratio'],
                    'date':
                    datetime.fromtimestamp(
                        post['data']['created_utc']).strftime('%Y-%m-%d'),
                    'ups':
                    post['data']['ups'],
                    'downs':
                    post['data']['downs'],
                    'score':
                    post['data']['score'],
                    'kind':
                    post['kind'],
                    'id':
                    post['data']['id'],
                },
                ignore_index=True)
        return df

    auth = requests.auth.HTTPBasicAuth(script, secret)
    data = {
        'grant_type': 'password',
        'username': '******',
        'password': '******'
    }

    headers = {'User-Agent': 'Final_Project/0.0.1'}

    request = requests.post('https://www.reddit.com/api/v1/access_token',
                            auth=auth,
                            data=data,
                            headers=headers)
    token = f"bearer {request.json()['access_token']}"
    headers = {**headers, **{'Authorization': token}}

    posts = pd.read_csv("trimmed_posts.csv")
    selected_cols = ['title', 'selftext']

    df = DataReader(ticker, 'yahoo', beginDate, endDate)
    df['Close'] = df['Adj Close']
    df = df.drop(columns=['Adj Close', 'High', 'Low', 'Open'], axis=1)
    modelData = df['Close'].to_frame()
    five_rolling = modelData.rolling(window=5).mean()
    ten_rolling = modelData.rolling(window=10).mean()
    twenty_rolling = modelData.rolling(window=20).mean()
    fifty_rolling = modelData.rolling(window=50).mean()
    hundred_rolling = modelData.rolling(window=100).mean()

    futureDays = 10
    modelData['Target'] = modelData['Close'].shift(-futureDays)

    X = np.array(modelData.drop(['Target'], 1))[:-futureDays]
    y = np.array(modelData['Target'])[:-futureDays]

    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.25)
    Xfuture = modelData.drop(['Target'], 1)[:-futureDays]
    Xfuture = Xfuture.tail(futureDays)
    Xfuture = np.array(Xfuture)

    train_data, test_data = df[0:int(len(df) * 0.7)], df[int(len(df) * 0.75):]
    training_data = train_data['Close'].values
    test_data = test_data['Close'].values
    history = [x for x in training_data]
    model_predictions = []
    N_test_observations = len(test_data)
    for time_point in range(N_test_observations):
        model = ARIMA(history, order=(4, 1, 0))
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        yhat = output[0]
        model_predictions.append(yhat)
        true_test_value = test_data[time_point]
        history.append(true_test_value)
    MSE_error = mean_squared_error(test_data, model_predictions)

    linear = LinearRegression().fit(Xtrain, ytrain)
    linearPrediction = linear.predict(Xfuture)
    linearResult = linear.score(Xtrain, ytrain)

    valid = modelData[X.shape[0]:]
    valid['Target'] = predictions

    tree = DecisionTreeRegressor().fit(Xtrain, ytrain)
    treePrediction = tree.predict(Xfuture)
    treeResult = tree.score(Xtrain, ytrain)
    predictions = treePrediction
    valid = modelData[X.shape[0]:]
    valid['Predict'] = predictions

    todaysDate = datetime.datetime.now().date()
    futureDays = 10
    us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
    futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd)
    combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4']
    futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol)
    futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0]
    combinedDF = df.append(futureDF, ignore_index=False)
    combinedDF.index.names = ['Date']
    currInfo = yf.Ticker(ticker).info

    currInfo = yf.Ticker(ticker).info
    infoDict = {
        'longName: ':
        currInfo['symbol'],
        'Current Ask/Bid: ':
        str(currInfo['ask']) + '/' + str(currInfo['bid']),
        'Open Price: ':
        str(round(currInfo['open'], 2)),
        'High/Low Price: ':
        str(currInfo['dayHigh']) + '/' + str(currInfo['dayLow']),
        'Avg Volume: ':
        str(currInfo['averageVolume']),
        'Volume: ':
        str(currInfo['volume']),
        '52w High: ':
        str(round(currInfo['fiftyTwoWeekHigh'], 2)),
        '52w Low: ':
        str(round(currInfo['fiftyTwoWeekLow'], 2)),
        'MorningStar Rating: ':
        str(currInfo['morningStarOverallRating']),
        'Short Ratio: ':
        str(currInfo['shortRatio'])
    }

    try:
        new_df = posts[posts[selected_cols].apply(
            lambda x: x.str.contains(ticker)).all(axis=1)]
        data = new_df
        data['Ticker'] = ticker
        group_df = new_df.groupby('created_utc') \
        .agg({'id':'count', 'num_comments':'mean', 'score':'mean'}) \
        .rename(columns={'id':'post_count','num_comments':'avg_comments', 'score':'avg_score'}) \
        .reset_index()
        group_df['Ticker'] = ticker
    except:
        places = {
            'created_utc': 0,
            'post_count': 0,
            'avg_comments': 0,
            'avg_score': 0,
            'ticker': ticker
        }
        group_df = pd.DataFrame.from_dict(places)

    super_posts_df = data.loc[data['score'] > 300]
    if not super_posts_df.empty:
        super_posts_df = super_posts_df.groupby(
            ['created_utc',
             'Ticker'])['score'].apply(lambda x: (x >= 300).sum()).reset_index(
                 name='Count_Score_300')

    try:
        pytrend = TrendReq(hl='en-US', tz=360)
        pytrend.build_payload(kw_list=[ticker],
                              timeframe=beginDate + ' ' + endDate,
                              geo='US')
        df = pytrend.interest_over_time()
        df['Noise'] = df[ticker]
        df[ticker] = ticker
        df.index.names = ['Date']
        df.columns = ['Ticker', 'isPartial', 'Noise']
        mergedNoise = df
    except:
        noise = {'Ticker': ticker, 'isPartial': 'No', 'Noise': 0}
        mergedNoise = pd.DataFrame.from_dict(noise)

    group_df['created_utc'] = pd.to_datetime(group_df['created_utc'])
    if not super_posts_df.empty:
        super_posts_df['created_utc'] = pd.to_datetime(
            super_posts_df['created_utc'])
    merged = group_df.merge(mergedNoise,
                            left_on=['created_utc', 'Ticker'],
                            right_on=['Date', 'Ticker'],
                            how='left')
    if not super_posts_df.empty:
        merged = merged.merge(super_posts_df,
                              left_on=['created_utc', 'Ticker'],
                              right_on=['created_utc', 'Ticker'],
                              how="left")
    else:
        merged['Count_Score_300'] = 0
    merged.drop(columns=['isPartial'])

    stockData = yf.download(ticker, start=beginDate, end=endDate)
    stockData['Ticker'] = ticker
    stockReport = pd.DataFrame(stockData,
                               columns=['Ticker', 'Adj Close', 'Volume'])
    merged = merged.merge(stockReport,
                          left_on=['created_utc', 'Ticker'],
                          right_on=['Date', 'Ticker'],
                          how='left')
    merged['post_count_change'] = merged['post_count'].pct_change()
    merged['avg_score_change'] = merged['avg_score'].pct_change()
    merged['Adj Close_change'] = merged['Adj Close'].pct_change()
    merged['Volume_change'] = merged['Volume'].pct_change()
    merged.replace([np.inf, -np.inf], np.nan, inplace=True)
    merged.replace([np.inf, -np.inf], np.nan).dropna(
        subset=['post_count_change', 'avg_score_change', 'Volume_change'],
        how="all")
    merged = merged.dropna()

    expected_posts = merged['post_count'].mean()
    expected_avg_comments = merged['avg_comments'].mean()
    expected_volume_change = merged['Volume'].mean()
    expected_300_count = merged['Count_Score_300'].mean()

    data = pd.DataFrame()
    length = 0
    try:
        params = {'limit': 100, 'q': ticker, 'restrict_sr': True}
        res = requests.get("https://oauth.reddit.com/r/WallStreetBets/search",
                           headers=headers,
                           params=params)

        new_df = df_from_response(res)
        new_df.sort_values(by=['date'], inplace=True, ascending=False, axis=0)
        row = new_df.iloc[len(new_df) - 1]
        fullname = row['kind'] + '_' + row['id']
        params['after'] = fullname
        data = data.append(new_df, ignore_index=True)
    except:
        data

    data['date'] = pd.to_datetime(data['date'])

    super_posts_live = data.groupby('date')['score'].apply(
        lambda x: (x >= 300).sum()).reset_index(name='Count_Score_300')
    if not super_posts_live.empty:
        super_posts_live = super_posts_live
    else:
        super_posts_live['Count_Score_300'] = 0

    try:
        data['Ticker'] = ticker
        live_group = data.groupby('date') \
        .agg({'id':'count', 'num_comments':'mean', 'score':'mean'}) \
        .rename(columns={'id':'post_count','num_comments':'avg_comments', 'score':'avg_score'}) \
        .reset_index()
        live_group['Ticker'] = ticker
    except:
        places = {
            'created_utc': 0,
            'post_count': 0,
            'avg_comments': 0,
            'avg_score': 0,
            'ticker': ticker
        }
        live_group = pd.DataFrame.from_dict(places)

    live_group = live_group.merge(super_posts_live,
                                  left_on=['date'],
                                  right_on=['date'],
                                  how="left")
    live_group = live_group.merge(stockReport,
                                  left_on=['date', 'Ticker'],
                                  right_on=['Date', 'Ticker'],
                                  how='left')
    live_group['Adj Close_change'] = live_group['Adj Close'].pct_change()
    live_group['Volume_change'] = live_group['Volume'].pct_change()
    live_group['post_count_change'] = live_group['post_count'].pct_change()
    live_group['avg_score_change'] = live_group['avg_score'].pct_change()

    xfits = live_group[live_group.date > datetime.now() -
                       pd.to_timedelta("3day")]
    xfits_dates = xfits['date']
    xfittings = xfits[[
        'post_count_change', 'avg_score_change', 'Volume_change',
        'Count_Score_300'
    ]]

    X_fits_scaled = X_scaler.transform(xfittings)
    social_predictions = model.predict(X_fits_scaled)
    social_predictions = pd.DataFrame(social_predictions.reshape(-1, 1))
    Xnew, _ = make_regression(n_samples=10,
                              n_features=4,
                              noise=0.01,
                              random_state=1)
    ynew = model.predict(Xnew)
    future_predict_df = pd.DataFrame(ynew.reshape(-1, 1))
    live_group.sort_values(by=['date'], inplace=True, ascending=False)

    future_dates = pd.date_range(start=today, periods=10).strftime('%Y-%m-%d')
    futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd)
    futureDates['SMPredict'] = df.append(future_predict_df, ignore_index=False)

    SMPredict = pd.DataFrame(index=future_dates, columns=combinedDFcol)
    future_predict_df = pd.DataFrame(ynew.reshape(-1, 1), future_dates)
    future_predict_df.rename(columns={0: "SMPredict"})
    SMPredict = SMPredict.merge(future_predict_df,
                                left_index=True,
                                right_index=True,
                                how="left")

    xfits = xfits.drop(columns=[
        'Ticker', 'Adj Close_change', 'Volume_change', 'post_count_change',
        'avg_score_change'
    ])
    xfits['SMPredictions'] = social_predictions
    xfits = xfits.set_index('date')
    xfits = df.append(SMPredict, ignore_index=False)
    xfits = xfits.drop(columns=['Ticker', 'isPartial', 'Noise'])
    xfits.index.name = "Date"

    combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4']
    futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol)
    futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0]
    combinedDF = df.append(futureDF, ignore_index=False)
    combinedDF.index.name = ['Date']

    today_dict = xfits.to_dict()
    futureSM_prediction = SMPredict.to_dict()
    carls_dict = combinedDF.to_dict()

    return ({today_dict, futureSM_prediction, carls_dict})
コード例 #7
0
ファイル: test_penfmb.py プロジェクト: 18279190173/python
import pandas as pd
import numpy as np
from numpy.testing import assert_almost_equal
from pandas_datareader.data import DataReader

import penfmb as pn

# French Data library
kwds = {'data_source': 'famafrench', 'start': '1972-1', 'end': '2013-12'}
MONTHLY = 0
ff = DataReader("F-F_Research_Data_Factors", **kwds)[MONTHLY]
twentyfive = DataReader("25_Portfolios_5x5", **kwds)[MONTHLY]
mom = DataReader("F-F_Momentum_Factor", **kwds)[MONTHLY]

twentyfive = twentyfive.subtract(ff['RF'], axis=0)
carhart = pd.concat([ff.drop('RF', axis=1), mom], axis=1)

class TestMain(unittest.TestCase):

    def test_fmb(self):
        _, b , _ = pn._fmb(twentyfive, ff['Mkt-RF'])
        assert_almost_equal(b.params,
                 np.array([ 1.4741268, -0.6969725]), decimal=4)

        _, b, _ = pn._fmb(twentyfive, ff.drop('RF', axis=1))
        assert_almost_equal(b.params,
                np.array([ 1.3495192, -0.7947864,  0.1414998,  0.4257349]),
                decimal=4)

    def test_PenFMB(self):
        penfmb = pn.PenFMB(nboot=1).fit(twentyfive, carhart)