from tensorflow.keras.models import model_from_yaml from performForecastClassFile import performForecast import altair as alt yaml_file = open('model/rel_model.yaml', 'r') loaded_model_yaml = yaml_file.read() yaml_file.close() lstm = model_from_yaml(loaded_model_yaml) # load weights into new model lstm.load_weights("model/rel_model.h5") print("Loaded model from disk") # Load "model" print('Model loaded') st.subheader('Reliance Stock Forecast') df = pdr.DataReader('RELIANCE.BO', 'yahoo') df = df.reset_index() cs_slide = st.slider('Number of Days', 1, df.shape[0]+1, 30) base = alt.Chart(df.tail(cs_slide)).encode( alt.X('Date:T', axis=alt.Axis(labelAngle=-45)), color=alt.condition("datum.Open <= datum.Close", alt.value("#06982d"), alt.value("#ae1325")) ) chart = alt.layer( base.mark_rule().encode(alt.Y('Low:Q', title='Price', scale=alt.Scale(zero=False)), alt.Y2('High:Q')), base.mark_bar().encode(alt.Y('Open:Q'), alt.Y2('Close:Q')), ).interactive()
def prediccion_acciones(company): #company = 'TSLA' start = dt.datetime(2012, 1, 1) end = dt.datetime(2020, 12, 31) dataset = web.DataReader(company, 'yahoo', start, end) #el yahoo es para usar la api de yahoo # # Sets de entrenamiento y validación # La LSTM se entrenará con datos de 2016 hacia atrás. La validación se hará con datos de 2017 en adelante. # En ambos casos sólo se usará el valor más alto de la acción para cada día # set_entrenamiento = dataset[:'2019'].iloc[:, 1:2] set_validacion = dataset['2020':].iloc[:, 1:2] #set_entrenamiento['High'].plot(legend=True) #set_validacion['High'].plot(legend=True) #plt.legend(['Entrenamiento (2006-2016)', 'Validación (2017)']) #plt.show() # Normalización del set de entrenamiento sc = MinMaxScaler(feature_range=(0, 1)) set_entrenamiento_escalado = sc.fit_transform(set_entrenamiento) # La red LSTM tendrá como entrada "time_step" datos consecutivos, y como salida 1 dato (la predicción a # partir de esos "time_step" datos). Se conformará de esta forma el set de entrenamiento time_step = 60 X_train = [] Y_train = [] m = len(set_entrenamiento_escalado) for i in range(time_step, m): # X: bloques de "time_step" datos: 0-time_step, 1-time_step+1, 2-time_step+2, etc X_train.append(set_entrenamiento_escalado[i - time_step:i, 0]) # Y: el siguiente dato Y_train.append(set_entrenamiento_escalado[i, 0]) X_train, Y_train = np.array(X_train), np.array(Y_train) # Reshape X_train para que se ajuste al modelo en Keras X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) # # Red LSTM # dim_entrada = (X_train.shape[1], 1) dim_salida = 1 na = 50 modelo = Sequential() modelo.add(LSTM(units=na, input_shape=dim_entrada)) modelo.add(Dense(units=dim_salida)) modelo.compile(optimizer='rmsprop', loss='mse') modelo.fit(X_train, Y_train, epochs=10, batch_size=32) # # Validación (predicción del valor de las acciones) # x_test = set_validacion.values x_test = sc.transform(x_test) X_test = [] for i in range(time_step, len(x_test)): X_test.append(x_test[i - time_step:i, 0]) X_test = np.array(X_test) X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) prediccion = modelo.predict(X_test) prediccion = sc.inverse_transform(prediccion) return prediccion
# tickers(healthcare 기업 종목코드), sets(기업의 주가 데이터를 저장할 데이터) list 생성 tickers = ['MDT', 'JNJ', 'GE', 'ABT', 'PHIA.AS', 'BDX', 'CAH', \ 'SIEGY', 'SYK', 'BAX', 'BSX', 'DHR', 'EL', 'ZBH', 'ALC', \ 'FMS', 'OCPNY', 'TRUMY', 'SNN', 'XRAY', 'EW', 'ISRG', 'HOCPY', \ 'HOLX', 'VAR', 'HRC', 'SOON.SW'] sets = ['data1', 'data2', 'data3', 'data4', 'data5', 'data6', 'data7', \ 'data8', 'data9', 'data10', 'data11', 'data12', 'data13', 'data14', 'data15', \ 'data16', 'data17', 'data18', 'data19', 'data20', 'data21', 'data22', 'data23', \ 'data24', 'data25', 'data26', 'data27'] data = [] five_days_later = [] five_days_before = [] a = 0 # AdjClose의 전날 대비 증가, 감소를 0 or 1로 표시 for i, j in zip(tickers, sets): j = web.DataReader(i, data_source='yahoo', start='1997-01-01', end='today') j.rename(columns={'Adj Close':'AdjClose'}, inplace=True) j['PriceLag1'] = j['AdjClose'].shift(-1) j['PriceDiff'] = j['PriceLag1']-j['AdjClose'] j['DailyReturn'] = j['PriceDiff']/j['AdjClose'] j['UpDown'] = [1 if j['DailyReturn'].loc[date] > 0 else 0 for date in j.index] j = j.filter(['AdjClose','UpDown']) j.rename(columns={'UpDown':i+'_Up(1)/Down(0)'}, inplace=True) j.filter([i+'_Up(1)/Down(0)']) sets[a] = j a += 1 data = pd.concat(sets, join='outer', axis=1) data = data[data>0] # 최대한 1이 많은 날짜 60일 추출 for i in tickers: data = data[(data[i+'_Up(1)/Down(0)'] == 1)]
import pandas_datareader as web import pandas as pd import datetime from EMA import * start = datetime.datetime(2018, 1, 1) end = datetime.datetime(2020, 1, 1) API_KEY = '8YUCTMX2NCJUI70T' ticker = 'AAPL' test = web.DataReader(ticker, 'av-daily-adjusted', start, end, api_key=API_KEY) #print(test) csv_name = ticker + '.csv' test.to_csv(csv_name) emalist12d = getEMA(csv_name, 12) emalist26d = getEMA(csv_name, 26) signallist = getEMA(csv_name, 9) print(len(emalist12d)) print(len(emalist26d)) print(len(signallist))
pip install mpl_finance # In[7]: import datetime as dt import matplotlib.dates as mdates import matplotlib.pyplot as plt import pandas_datareader as web from mpl_finance import candlestick_ohlc start = dt.datetime(2020,1,1) end = dt.datetime.now() df = web.DataReader('AAPL','yahoo',start, end) print(df.head()) df = df[['Open','High','Low','Close']] df.reset_index(inplace=True) df['Date'] = df['Date'].map(mdates.date2num) ax = plt.subplot() candlestick_ohlc(ax, df.values, width=8, colorup='g', colordown='r') ax.xaxis_date() ax.grid(True) plt.show() ax.grid(True)
import numpy as np import pandas as pd import random random.seed(42) import matplotlib.pyplot as plt import pandas_datareader as web import seaborn as sns sns.set() ########## Time Series Example dataset = pd.DataFrame( web.DataReader("SWED-A.ST", "yahoo", start="2010")['Adj Close']) plt.plot(dataset['Adj Close'], label='SWED-A.ST', color='k', linewidth=1) plt.legend() plt.show() ########## Homoscedastic rand = np.random.rand(100) * 5 y = np.arange(100) x = y + rand plt.plot(y, x, 'o', color='k', ms=3) plt.show() ########## Heteroscesdastic rand = (np.random.rand(100) / 1.5) * y y = np.arange(100) x = y + rand plt.plot(y, x, 'o', color='k', ms=3) plt.show() ########## Rolling Variance of Swedbank
import numpy as np import matplotlib.pyplot as plt import pandas_datareader as web import datetime as dt from sklearn.preprocessing import MinMaxScaler from tensorflow.keras.layers import Dense, Dropout, LSTM from tensorflow.keras.models import Sequential cryptocurrency = 'BTC' currency = 'USD' start = dt.datetime(2016, 1, 1) end = dt.datetime.now() data = web.DataReader(f'{cryptocurrency}-{currency}', 'yahoo', start, end) # Preparing the data scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1, 1)) prediction_days = 60 x_train, y_train = [], [] for x in range(prediction_days, len(scaled_data)): x_train.append(scaled_data[x - prediction_days:x, 0]) y_train.append(scaled_data[x, 0]) x_train, y_train = np.array(x_train), np.array(y_train) x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# create a mask mask = [date_list.isin(x) for x in recessions] mask = reduce(operator.or_, mask) # create our data frame data = pd.Categorical(np.zeros(len(date_list)), dtype="category", categories=[0, 1]) data = pd.Series(data, index=date_list) data.name = 'Recession' data[mask] = 1 data = pd.to_numeric(data) #date_list = pd.date_range(start_date, end_date) # now lets get some data from FRED ten_yr = pdr.DataReader('DGS10', 'fred', start_date, end_date) bills = pdr.DataReader('DTB3', 'fred', start_date, end_date) ffunds = pdr.DataReader('DFF', 'fred', start_date, end_date) rates = pd.concat([ffunds, bills, ten_yr], axis=1).fillna(method='ffill') rates = rates / 100 # Add the curve rates['Curve'] = rates['DGS10'] - rates['DTB3'] for z in rates.columns: for i in days: rates[f'{z}_{i}'] = create_diff(rates[z], i)
import pandas as pd import matplotlib.pyplot as plt import pandas_datareader as web import math import numpy as np from sklearn.preprocessing import MinMaxScaler from statsmodels.tsa.stattools import adfuller, acf, pacf from statsmodels.tsa.arima_model import ARIMA from sklearn.metrics import mean_squared_error import warnings warnings.filterwarnings("ignore") from pmdarima.arima import auto_arima df = web.DataReader('AMZN', data_source='yahoo', start='2012-01-01', end='2021-04-26') print(df.head()) print(df.shape) plt.figure(figsize=(12, 6)) plt.title('closing price') plt.plot(df['Close']) plt.xlabel('Date', fontsize=18) plt.ylabel('close price', fontsize=18) plt.show() print(df.info()) df_close = df['Close'] print(df_close.head()) def test_stationarity(timeseries):
import pandas_datareader as web import datetime as dt import pickle import copy from sklearn.preprocessing import MinMaxScaler from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, LSTM # Load Data company = 'XPEV' start = dt.datetime(2000, 1, 1) end = dt.datetime(2020, 12, 20) data = web.DataReader(company, 'yahoo', start, end) print(data) # Prepare Data scaler = MinMaxScaler(feature_range=(0, 1)) scaler_data = scaler.fit_transform(data['Close'].values.reshape(-1, 1)) prediction_days = 60 x_train = [] y_train = [] for x in range(prediction_days, len(scaler_data)): x_train.append(scaler_data[x - prediction_days:x, 0]) y_train.append(scaler_data[x, 0]) x_train, y_train = np.array(x_train), np.array(y_train)
pdr1.py This script should use pandas-datareader to get prices from google.com. Demo: conda install pandas-datareader ~/anaconda3/bin/python pdr1.py """ # pandas_datareader depends on shell command: # conda install pandas-datareader import pandas_datareader as pdr import datetime import pdb # I should stop the script here and start the debugger: pdb.set_trace() start_dt = datetime.datetime(2016, 1, 1) end_dt = datetime.datetime(2026, 12, 31) prices_df = pdr.DataReader('IBM', 'yahoo', start_dt, end_dt) print(prices_df.head()) # I should write the prices to a CSV file in /tmp: prices_df.to_csv('/tmp/prices_ibm.csv', float_format='%4.3f') # I should end the script with a simple string which might be a # convenient breakpoint for the debugger: 'bye'
if "RGEK-LAPTOP-" in _host: _drive_letter = 'D' _root_dir = _drive_letter + ":" + root_dir _output_dir = _root_dir + output_dir + output_sub_dir if not os.path.exists(_output_dir): os.makedirs(_output_dir) #array to store prices symbols = [] #pull price using iex for each symbol in list defined above for ticker in tickers: try: #r = web.DataReader(name=ticker, data_source=api_host, start=start, access_key=api_key) r = web.DataReader(name=ticker, data_source=api_host, start=start) r['Symbol'] = ticker # add a symbol column symbols.append(r) except Exception as ex: print(ex) # concatenate into df df = pd.concat(symbols) df = df.reset_index() df = df[['Date', 'Close', 'Symbol']] print("df.head() : {}", df.head()) df_pivot = df.pivot('Date', 'Symbol', 'Close').reset_index() print("df_pivot : {}", df_pivot)
@author: francescatenan """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import pandas_datareader as web # Assets to be included in the portfolio tickers = ['AAPL', 'AMZN', 'MSFT', 'WMT'] # Asset weights wts = [0.296223, 0.442899, 0.812707, 0.439760] price_data = web.DataReader(tickers, data_source='yahoo', start='01/01/2018', end='01/01/2020')['Adj Close'] ret_data = price_data.pct_change()[1:] weighted_returns = (wts * ret_data) #print(weighted_returns.head()) port_ret = weighted_returns.sum(axis=1) # axis =1 tells pandas we want to add # the rows #print(port_ret) weight_sharpe_port = (5 - 1) / (17 - 1) #print('weight of sharpe portfolio is', weight_sharpe_port)
import pandas as pd import numpy as np import datetime as dt from datetime import datetime import matplotlib.pyplot as plt import pyEX as p import pandas_datareader as web from matplotlib import style from nsetools import nse # stocks = ['ADANIPORTS.NS', 'ASIANPAINT.NS', 'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS', 'BHARTIARTL.NS', 'BPCL.NS', 'BRITANNIA.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DIVISLAB.NS', 'DRREDDY.NS', 'EICHERMOT.NS', 'GAIL.NS', 'GRASIM.NS', 'HDFC.NS', 'HDFCBANK.NS', 'HDFCLIFE.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS', 'ICICIBANK.NS', 'INDUSINDBK.NS', 'INFY.NS', 'IOC.NS', 'IRCON.NS', 'ITC.NS', 'JSWSTEEL.NS', 'KOTAKBANK.NS', 'LT.NS', 'M&M.NS', 'MARUTI.NS', 'NESTLEIND.NS', 'NTPC.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS', 'SBIN.NS', 'SHREECEM.NS', 'SUNPHARMA.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', 'TCS.NS', 'TECHM.NS', 'TITAN.NS', 'ULTRACEMCO.NS', 'UPL.NS', 'WIPRO.NS' ] stocksIown = ['WIPRO.NS', 'LUXIND.NS', 'DFMFOODS.NS', 'SUBROS.NS'] style.use('ggplot') # start = dt.datetime(2019, 1, 1) # end = dt.datetime(2020, 12, 30) # df = web.DataReader('PSPPROJECT.NS', 'yahoo', start, end) # df.to_csv('PSPPROJECT.NS') # for stock in stocksIown: start = dt.datetime(2019, 1, 1) end = dt.datetime(2021, 1, 6) df = web.DataReader(stock, 'yahoo', start, end) df.to_csv(stock) print(stock + " done")
def load_rf(freq='M'): """Build a risk-free rate return series using 3-month US T-bill yields. The 3-Month Treasury Bill: Secondary Market Rate from the Federal Reserve (a yield) is convert to a total return. See 'Methodology' for details. The time series should closely mimic returns of the BofA Merrill Lynch US Treasury Bill (3M) (Local Total Return) index. Parameters ---------- freq : str, sequence, or set If a single-character string, return a single-column DataFrame with index frequency corresponding to `freq`. If a sequence or set, return a dict of DataFrames with the keys corresponding to `freq`(s) Methodology ----------- The Federal Reserve publishes a daily chart of Selected Interest Rates (release H.15; www.federalreserve.gov/releases/h15/). As with a yield curve, some yields are interpolated from recent issues because Treasury auctions do not occur daily. While the de-annualized ex-ante yield itself is a fairly good tracker of the day's total return, it is not perfect and can exhibit non-neglible error in periods of volatile short rates. The purpose of this function is to convert yields to total returns for 3-month T-bills. It is a straightforward process given that these are discount (zero-coupon) securities. It consists of buying a 3-month bond at the beginning of each month, then amortizing that bond throughout the month to back into the price of a <3-month tenor bond. The source data (pulled from fred.stlouisfed.org) is quoted on a discount basis. (See footnote 4 from release H.15.) This is converted to a bond-equivlanet yield (BEY) and then translated to a hypothetical daily total return. The process largely follows Morningstar's published Return Calculation of U.S. Treasury Constant Maturity Indices, and is as follows: - At the beginning of each month a bill is purchased at the prior month-end price, and daily returns in the month reflect the change in daily valuation of this bill - If t is not a business day, its yield is the yield of the prior business day. - At each day during the month, the price of a 3-month bill purchased on the final calendar day of the previous month is computed. - Month-end pricing is unique. At each month-end date, there are effectively two bonds and two prices. The first is the bond hypothetically purchased on the final day of the prior month with 2m remaining to maturity, and the second is a new-issue bond purchased that day with 3m to maturity. The former is used as the numerator to compute that day's total return, while the latter is used as the denominator to compute the next day's (1st day of next month) total return. Description of the BofA Merrill Lynch US 3-Month Treasury Bill Index: The BofA Merrill Lynch US 3-Month Treasury Bill Index is comprised of a single issue purchased at the beginning of the month and held for a full month. At the end of the month that issue is sold and rolled into a newly selected issue. The issue selected at each month-end rebalancing is the outstanding Treasury Bill that matures closest to, but not beyond, three months from the rebalancing date. To qualify for selection, an issue must have settled on or before the month-end rebalancing date. (Source: Bank of America Merrill Lynch) See also -------- FRED: 3-Month Treasury Bill: Secondary Market Rate (DTB3) https://fred.stlouisfed.org/series/DTB3 McGraw-Hill/Irwin, Interest Rates, 2008. https://people.ucsc.edu/~lbaum/econ80h/LS-Chap009.pdf Morningstar, Return Calculation of U.S. Treasury Constant Maturity Indices, September 2008. """ freqs = 'DWMQA' freq = freq.upper() if freq not in freqs: raise ValueError('`freq` must be either a single element or subset' ' from %s, case-insensitive' % freqs) # Load daily 3-Month Treasury Bill: Secondary Market Rate. # Note that this is on discount basis and will be converted to BEY. # Periodicity is daily. rates = pdr.DataReader('DTB3', 'fred', DSTART)\ .mul(0.01)\ .asfreq('D', method='ffill')\ .fillna(method='ffill')\ .squeeze() # Algebra doesn't 'work' on DateOffsets, don't simplify here! minus_one_month = offsets.MonthEnd(-1) plus_three_months = offsets.MonthEnd(3) trigger = rates.index.is_month_end dtm_old = rates.index + minus_one_month + plus_three_months - rates.index dtm_new = rates.index.where(trigger, rates.index + minus_one_month) \ + plus_three_months - rates.index # This does 2 things in one step: # (1) convert discount yield to BEY # (2) get the price at that BEY and days to maturity # The two equations are simplified # See https://people.ucsc.edu/~lbaum/econ80h/LS-Chap009.pdf p_old = (100 / 360) * (360 - rates * dtm_old.days) p_new = (100 / 360) * (360 - rates * dtm_new.days) res = p_old.pct_change().where(trigger, p_new.pct_change()).dropna() # TODO: For purpose of using in TSeries, we should drop upsampled # periods where we don't have the full period constituents. return res.add(1.).resample(freq).prod().sub(1.)
import datetime as dt import pandas_datareader as web import matplotlib.pyplot as plt import matplotlib.dates as mdates from mpl_finance import candlestick_ohlc # Define time frame start = dt.datetime(2020, 1, 1) # end = dt.datetime.now() end = dt.datetime(2020, 7, 1) # Load the data data = web.DataReader('UVXY', 'yahoo', start, end) # print(data.columns) # print(data) # Restructure Data data = data[['Open', 'High', 'Low', 'Close']] data.reset_index(inplace=True) # Convert date into numbers data['Date'] = data['Date'].map(mdates.date2num) # print(data.head()) # Visualization ax = plt.subplot() # Create grid ax.grid(True) # Makesure it is below the data points
def load_factors(): """Load risk factor returns. Factors ------- Symbol Description Source ------ ---------- ------ MKT French SMB Size (small minus big) French HML Value (high minus low) French RMW Profitability (robust minus weak) French CMA Investment (conservative minus aggressive) French UMD Momentum (up minus down) French STR Short-term reversal French LTR Long-term reversal French BETA Beta French ACC Accruals French VAR Variance French IVAR Residual variance French EP Earnings-to-price French CP Cash flow-to-price French DP Dividend-to-price French BAB Betting against beta AQR QMJ Quality minus junk AQR HMLD Value (high minus low) [modified version] AQR LIQ Liquidity Pastor BDLB Bond lookback straddle Hsieh FXLB Curency lookback straddle Hsieh CMLB Commodity lookback straddle Hsieh IRLB Interest rate lookback straddle Hsieh STLB Stock lookback straddle Hsieh PUT CBOE S&P 500 PutWrite Index CBOE BXM CBOE S&P 500 BuyWrite Index® CBOE RXM CBOE S&P 500 Risk Reversal Index CBOE Source Directory ---------------- Source Link ------ ---- French http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html # noqa Pastor http://faculty.chicagobooth.edu/lubos.pastor/research/liq_data_1962_2016.txt # noqa AQR https://www.aqr.com/library/data-sets Hsieh https://faculty.fuqua.duke.edu/~dah7/HFData.htm Fed https://fred.stlouisfed.org/ CBOE http://www.cboe.com/products/strategy-benchmark-indexes """ # TODO: factors elegible for addition # VIIX, VIIZ, XIV, ZIV, CRP (AQR) # http://www.cboe.com/micro/buywrite/monthendpricehistory.xls ends 2016 # could use: # http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv # Warning: slow, kludgy data retrieval follows # ------------------------------------------------------------------------ # `tgt` will become a list of DataFrames and eventually concatenated tgt = [] # MKT, SMB, HML, RMW, CMA, RF, UMD, STR, LTR facs = [ 'F-F_Research_Data_5_Factors_2x3', 'F-F_Momentum_Factor', 'F-F_ST_Reversal_Factor', 'F-F_LT_Reversal_Factor' ] for fac in facs: tgt.append(pdr.DataReader(fac, 'famafrench', DSTART)[0]) # BETA, ACC, VAR, IVAR require some manipulation to compute returns # in the dual-sort method of Fama-French for i in ['BETA', 'AC', 'VAR', 'RESVAR']: ser = pdr.DataReader('25_Portfolios_ME_' + i + '_5x5', 'famafrench', DSTART)[0] ser = ser.iloc[:, [0, 5, 10, 15, 20]].mean(axis=1)\ - ser.iloc[:, [4, 9, 14, 19, 24]].mean(axis=1) ser = ser.rename(i) tgt.append(ser) # E/P, CF/P, D/P (univariate sorts, quintile spreads) for i in ['E-P', 'CF-P', 'D-P']: ser = pdr.DataReader('Portfolios_Formed_on_' + i, 'famafrench', DSTART)[0] ser = ser.loc[:, 'Hi 20'] - ser.loc[:, 'Lo 20'] ser = ser.rename(i) tgt.append(ser) tgt = [df.to_timestamp(how='end') for df in tgt] # BAB, QMJ, HMLD # TODO: performance is poor here, runtime is eaten up by these 3 links = {'BAB': 'http://bit.ly/2hWyaG8', 'QMJ': 'http://bit.ly/2hUBSgF', 'HMLD': 'http://bit.ly/2hdVb7G'} for key, value in links.items(): ser = pd.read_excel(value, header=18, index_col=0)['USA'] * 100 ser = ser.rename(key) tgt.append(ser) # Lookback straddles link = 'http://faculty.fuqua.duke.edu/~dah7/DataLibrary/TF-Fac.xls' straddles = pd.read_excel(link, header=14, index_col=0) straddles.index = pd.DatetimeIndex(straddles.index.astype(str) + '01') \ + offsets.MonthEnd(1) straddles = straddles * 100. tgt.append(straddles) # LIQ link = 'http://bit.ly/2pn2oBK' liq = pd.read_csv(link, skiprows=14, delim_whitespace=True, header=None, usecols=[0, 3], index_col=0, names=['date', 'LIQ']) liq.index = pd.DatetimeIndex(liq.index.astype(str) + '01') \ + offsets.MonthEnd(1) liq = liq.replace(-99, np.nan) * 100. tgt.append(liq) # USD, HY fred = pdr.DataReader(['DTWEXB', 'BAMLH0A0HYM2'], 'fred', DSTART) fred = (fred.asfreq('D', method='ffill') .fillna(method='ffill') .asfreq('M')) fred.loc[:, 'DTWEXB'] = fred['DTWEXB'].pct_change() * 100. fred.loc[:, 'BAMLH0A0HYM2'] = fred['BAMLH0A0HYM2'].diff() tgt.append(fred) # PUT, BXM, RXM (CBOE options strategy indices) link1 = 'http://www.cboe.com/micro/put/put_86-06.xls' link2 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/putdailyprice.csv' # noqa put1 = pd.read_excel(link1, index_col=0, skiprows=6, header=None)\ .rename_axis('DATE') put2 = pd.read_csv(link2, index_col=0, parse_dates=True, skiprows=7, header=None).rename_axis('DATE') put = pd.concat((put1, put2))\ .rename(columns={1: 'PUT'})\ .iloc[:, 0]\ .asfreq('D', method='ffill')\ .fillna(method='ffill')\ .asfreq('M')\ .pct_change() * 100. tgt.append(put) link1 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmarchive.csv' # noqa link2 = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/bxmcurrent.csv' # noqa bxm1 = pd.read_csv(link1, index_col=0, parse_dates=True, skiprows=5, header=None).rename_axis('DATE') bxm2 = pd.read_csv(link2, index_col=0, parse_dates=True, skiprows=4, header=None).rename_axis('DATE') bxm = pd.concat((bxm1, bxm2))\ .rename(columns={1: 'BXM'})\ .iloc[:, 0]\ .asfreq('D', method='ffill')\ .fillna(method='ffill')\ .asfreq('M')\ .pct_change() * 100. tgt.append(bxm) link = 'http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/rxm_historical.csv' # noqa rxm = pd.read_csv(link, index_col=0, parse_dates=True, skiprows=2, header=None)\ .rename(columns={1: 'RXM'})\ .rename_axis('DATE')\ .iloc[:, 0]\ .asfreq('D', method='ffill')\ .fillna(method='ffill')\ .asfreq('M')\ .pct_change() * 100. tgt.append(rxm) # Clean up data retrieved above # ----------------------------------------------------------------- factors = pd.concat(tgt, axis=1).round(2) newnames = { 'Mkt-RF': 'MKT', 'Mom ': 'UMD', 'ST_Rev': 'STR', 'LT_Rev': 'LTR', 'RESVAR': 'IVAR', 'AC': 'ACC', 'PTFSBD': 'BDLB', 'PTFSFX': 'FXLB', 'PTFSCOM': 'CMLB', 'PTFSIR': 'IRLB', 'PTFSSTK': 'STLB', 'DTWEXB': 'USD', 'BAMLH0A0HYM2': 'HY' } factors.rename(columns=newnames, inplace=True) # Get last valid RF date; returns will be constrained to this date factors = factors[:factors['RF'].last_valid_index()] # Subtract RF for long-only factors subtract = ['HY', 'PUT', 'BXM', 'RXM'] for i in subtract: factors.loc[:, i] = factors[i] - factors['RF'] return factors
import pandas_datareader as web df = web.DataReader('BTCUSD=X', 'yahoo') df.to_csv('btc_usd.csv', mode='w', header=True)
import math import pandas_datareader as web import numpy as np import pandas as pd from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense, LSTM import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') df = web.DataReader('AAPL', data_source='yahoo', start='2012-01-01', end='2020-12-17') plt.figure(figsize=(16,8)) plt.title('Close Price History') plt.plot(df['Close']) plt.xlabel('Date',fontsize=18) plt.ylabel('Close Price USD ($)',fontsize=18) plt.show() data = df.filter(['Close']) dataset = data.values training_data_len = math.ceil( len(dataset) *.8) scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) train_data = scaled_data[0:training_data_len , : ] x_train=[] y_train = [] for i in range(60,len(train_data)):
import pandas as pd import pandas_datareader as web import matplotlib.pyplot as plt import numpy as np ticker = 'TSLA' start = '2019-01-18' end = '2020-01-18' stock = web.DataReader(ticker, 'yahoo', start, end) stock = stock.dropna(how='any') stock.head() stock['Adj Close'].plot(grid=True) stock['ret'] = stock['Adj Close'].pct_change() stock['ret'].plot(grid=True) stock['20d'] = stock['Adj Close'].rolling(window=20, center=False).mean() stock['20d'].plot(grid=True) #Populates the time period number in stock under head t stock['t'] = range(1, len(stock) + 1) #Computes t squared, tXD(t) and n stock['sqr t'] = stock['t']**2 stock['tXD'] = stock['t'] * stock['Adj Close'] n = len(stock) #Computes slope and intercept slope = (n * stock['tXD'].sum() - stock['t'].sum() * stock['Adj Close'].sum() ) / (n * stock['sqr t'].sum() - (stock['t'].sum())**2)
#Import the libraries import math import pandas_datareader as web import numpy as np import pandas as pd from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense, LSTM import matplotlib.pyplot as plt #Get the stock quote df = web.DataReader('ONGC.NS', data_source='yahoo', start='2012-01-01', end='2019-12-17') #Visualize the closing price history plt.figure(figsize=(16,8)) plt.title('Close Price History') plt.plot(df['Close']) plt.xlabel('Date',fontsize=18) plt.ylabel('Close Price Rupees (Rs)',fontsize=18) plt.show() #Create a new dataframe with only the 'Close' column data = df.filter(['Close']) #Converting the dataframe to a numpy array dataset = data.values #Get /Compute the number of rows to train the model on training_data_len = math.ceil( len(dataset) *.8) #Scale the all of the data to be values between 0 and 1 scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset)
import math import pandas_datareader as web import numpy as np import pandas as pd from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense, LSTM import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') # In[2]: #Get the stock quote df = web.DataReader('HDFCBANK.NS', data_source='yahoo', start='2000-01-01', end='2020-03-31') # In[3]: # Show the data df # In[4]: #Get the number of rows and columns in the data set df.shape
data = input('Enter the stocks here ').upper() for datum in data: if datum == ',': data = data.replace(datum, '') stocks = data.split() stock_data = pd.DataFrame() start_date = '2015-01-01' end_date = datetime.today().strftime('%Y-%m-%d') for stock in stocks: stock_data[stock] = web.DataReader(stock, data_source='yahoo', start=start_date, end=end_date)['Adj Close'] dailyReturns = stock_data.pct_change() number_of_portfolios = 20 RF = 0 portfolio_returns = [] portfolio_risk = [] sharpe_ratio_port = [] portfolio_weights = [] for portfolio in range(number_of_portfolios): # generate a w random weight of length of number of stocks weights = np.random.random_sample(len(stock_data.columns))
#determining the correlation of the date from solar companies using plots and matrices depending on their returns import datetime import matplotlib.pyplot as plt import pandas_datareader as data start = datetime.datetime(2015, 7, 1) end = datetime.datetime(2016, 6, 1) solar_df = data.DataReader(['FSLR', 'TAN', 'RGSE', 'SCTY'], 'google', start, end)['Close'] solar_df rets = solar_df.pct_change() rets #plt.scatter(rets.FSLR,rets.TAN) #better correlated #plt.scatter(rets.RGSE,rets.SCTY) #not well correlated solar_corr = rets.corr() solar_corr plt.scatter(rets.mean(), rets.std()) plt.xlabel('Expected returns') plt.ylabel('Standard deviation') for label, x, y in zip(rets.columns, rets.mean(), rets.std()): plt.annotate(label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', bbox=dict(boxstyle='round, pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0')) plt.show()
def analisis(instrumento, start, end): """Estrategia divergencias Precio vr RSI, v0 Funcion que recibe el nombre de un insturmeto para analizar. busca datos en Yahoo y realiza la priemra estrategia con por divergencias con RSI y graba un excel. Utiliza la fórmula general (también conocida coloquialmente como el "chicharronero"). Parámetros: a -- b -- c -- Devuelve: Valores trabajados y ordenados Excepciones: ValueError -- Si (a == 0) """ #a.- Leer de WEB #df =web.DataReader(tickers[valorNum], 'yahoo', start, end) # leemos los valore sde tesl #Guardarlo en fichero .CSV df = web.DataReader( instrumento, 'yahoo', start, end) # leemos los valore sde tesl #Guardarlo en fichero .CSV #df.to_csv('endesa.csv') #b.- Leer de .CSV #df = pd.read_csv('endesa.csv', parse_dates=True, index_col=0) #mostrar comienzo final del fichero print( "*************************************************************************** INSTRUMENTO ==>> ", instrumento) print(df.head()) print(df.tail()) """ ajustadas = [f for f in qd_data.columns if f.startswith('Adj')] #devuelve una lista con las cadenas de las columnas qd_data = qd_data[ajustadas].copy() qd_data.columns = [f[5:] for f in qd_data.columns] # quitamos las 5 primeras letas "ADJ. " print(qd_data.head()) """ # 2.- Calculamos el RSI df['RSI'] = ta.momentum.rsi(df['Close']) # 2bis.- Calculamos MA df = MovingAverage(df, long_=40, short_=10) # 2rebis .- removeo nan df.dropna(inplace=True) #remover missing values df['MiIndice'] = list(range(len(df))) # 3.- Dibujamos las gráficas if (False): fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(18, 8)) x_values = list(range( len(df))) #creo un array de consecutivos para la x del plot """ ax1.plot(1,1,x_values, df['Close'], color="red", label='cierre') ax2.plot(2,1,x_values, df['RSI'], color="green", label='rsi') ax2.set_xlabel('sesiones') ax1.set_title('Valor Cierre') ax2.set_title('RSI') #Plotear puntos ax2.plot(20, 67, '^') #Plotaer lineas constantes ax2.hlines(30,0,len(df), colors='grey', linestyles='solid', linestyle='--', label='30') ax2.hlines(70,0,len(df), colors='grey', linestyles='solid', linestyle='--', label='70') ax1.legend(['','Cierre'], loc=1) ax2.legend(['','rsi','keyPoint','30','70'], loc=2) """ ax1.plot(1, 1, df.index, df['Close'], color="red", label='cierre') ax2.plot(2, 1, df.index, df['RSI'], color="green", label='rsi') ax2.set_xlabel('Sesiones') ax1.set_title('Valor Cierre ' + instrumento) ax2.set_title('RSI') ax1.set_xlim(start, end) ax2.set_xlim(start, end) #Plotaer lineas constantes de los limites RSI ax2.hlines(30, start, end, colors='yellow', linestyles='solid', label='30') ax2.hlines(70, start, end, colors='yellow', linestyles='solid', label='70') ax1.legend(['Cierre'], loc=1) ax2.legend(['rsi', '30', '70'], loc=2) #Plotear puntos ax2.plot(start, 67, '^', color='red') plt.show() #Llamadas a la Clase objetoJuan = Person("Viton", 36) objetoJuan.myfunc() #slopeJ3(df['Adj Close']) #MovingAverage(df,long_=200,short_=50) #MAXIMOS RELATIVOS #salvarExcel(df, "tendencia") dff, peaks, valleys = MAX_min_Relativos_v3(df['Close']) dff_RSI, peaks_RSI, valleys_RSI = MAX_min_Relativos_v3(df['RSI']) ''' salvarExcel(dff, "endesa_valor") salvarExcel(dff_RSI, "endesa_RSI") salvarExcel(df, "endesa_base") ''' # ################################################# # ################################################# # ################################################# # VAMOS A CREAR LA ESTRATEGIA # 1.- Buscamos divergencia preci versu RSI en niveles de RSI de Sobrevendido (<30) # 2.- Divergencia: precio Maximos decreciente y RSI minmos crecientes. # 3.- Señal ############ traza index_de_RSI_menor_que_valor = 'nada' valorMax = 0 valor2MaxDecreciente = 0 indice2MaxDecreciente = 0 var2Max_RSI = 0 date2varMax_RSI = 0 fecha = 0 diaInicioPrecio = dt.datetime.today() diaInicioRSI = dt.datetime.today() diaFinPrecio = dt.datetime.today() diaFinRSI = dt.datetime.today() ref_KeyData = 1 rsi_ = df.columns.get_loc("RSI") marcaMxMn_ = dff.columns.get_loc("marcasMxMn") valorSerie_ = dff.columns.get_loc("serie") datosRelevantes = pd.DataFrame(columns=('fecha1', 'valorMIN_1', 'fecha2', 'valorMIN_2', 'fechaRSI1', 'valorMIN_RSI_1', 'fechaRSI2', 'valorMIN_RSI_2', 'fechaValorMAX_1', 'valorMAX_1', 'fechaValorMAX_2', 'valorMAX_2')) marca = 'buscando_RSI' marca2 = 'nada' marca3 = 'nada' i_1 = 0 i_2 = 0 maxBajando = False RSIsubiendo = False for i in range( 15, len(df) ): #me falta saber como conseguir el indice numerico de una etiqueta if (i == 651): a = 5 # RSI menos de 30 **************************************************************************RSI if (marca == 'buscando_RSI' and df.iloc[i, rsi_] < 30 ): #Me tegno que currar un maquina de estados son switch/case #print ('rsi <40 en', i) marca = 'RSI_encontrado' index_de_RSI_menor_que_valor = df.index[i] if (marca == 'RSI_encontrado' and df.iloc[i, rsi_] > 50): #el RSI se sale de overSlod marca = 'buscando_RSI' marca2 = 'nada' marca3 = 'nada' #marca='RSI_encontrado' # quitar solo para testeo del rango 100!!!! #********************************************************************************************RSI # SON MINIMOS del precio decreciendo if (marca == 'RSI_encontrado'): # minimos del precio decreciendo**************************************** Instrumento Min_decreciente if (dff.iloc[i, marcaMxMn_] == 1 and marca2 == 'nada'): #marcaMXMn==1 es minimo marca2 = 'primerMax' varMax = dff.iloc[i, 0] #almaceno posicion valorMax = dff.iloc[i, 0] datevalorMax = df.index[i] diaInicioPrecio = df.index[i] i_1 = i #print ("señal 20 en" ,i) if (dff.iloc[i, marcaMxMn_] == 1 and (marca2 == 'primerMax' or marca2 == 'ultimoMaxDecreciente') and dff.iloc[i, 0] < varMax): varMax = dff.iloc[i, 0] #almaceno posicion marca2 = 'ultimoMaxDecreciente' valor2MaxDecreciente = dff.iloc[i, 0] datevalor2MaxDecreciente = df.index[i] indice2MaxDecreciente = i diaFinPrecio = df.index[i] i_2 = i #print ("señal 21 en" ,i) if (dff.iloc[i, marcaMxMn_] == 1 and (marca2 == 'primerMax' or marca2 == 'ultimoMaxDecreciente') and dff.iloc[i, 0] > varMax): #no es mínimo decreciente marca2 = 'nada' diaInicioPrecio = dt.datetime.today() diaFinPrecio = dt.datetime.today() #print ("señal 22 en" ,i) #*********************************************************************** Valor_Min_decreciente # ********************************************************************* mínimos RSI creciente if (dff_RSI.iloc[i, marcaMxMn_] == 1 and marca3 == 'nada'): marca3 = 'primerMIN_RSI' varMax_RSI = dff_RSI.iloc[i, 0] valorMax_RSI = dff_RSI.iloc[i, 0] datevarMax_RSI = df.index[i] diaInicioRSI = df.index[i] if (dff_RSI.iloc[i, marcaMxMn_] == 1 and (marca3 == 'primerMIN_RSI' or marca3 == 'ultimoMinCreciente_RSI') and dff_RSI.iloc[i, 0] > varMax_RSI): varMax_RSI = dff_RSI.iloc[i, 0] #almaceno posicion marca3 = 'ultimoMinCreciente_RSI' var2Max_RSI = dff_RSI.iloc[i, 0] date2varMax_RSI = df.index[i] diaFinRSI = df.index[i] if (dff_RSI.iloc[i, marcaMxMn_] == 1 and (marca3 == 'primerMIN_RSI' or marca3 == 'ultimoMinCreciente_RSI') and dff_RSI.iloc[i, 0] < varMax_RSI): # minimo no creciente marca3 = 'nada' diaInicioRSI = dt.datetime.today() diaFinRSI = dt.datetime.today() # ********************************************************************* mínimos RSI creciente # J3: el proceso de arriba que calcula secuencias creciente/decrecientes, pierde el primer valor cuando cambia la # tendencia. Cuando borramos la marca, no tenemos en cuenta que este punto es el primero de la serie en sentido inverso # por este motivo necesita TRES puntos para determinal max/min decrecientes/crecientes # ************************************ Fechas en mismo intervalo temporal ventana = 20 #ubicar en el mismo tiempo las dos condiciones if ((dt.timedelta(days=ventana) > abs(diaInicioPrecio - diaInicioRSI)) and (dt.timedelta(days=ventana) > abs(diaFinPrecio - diaFinRSI))): intervaloValido = True else: intervaloValido = False # ************************************* Determinar la tendencia de los Maximos del valor, para esperar la ruptura # estamos analizando si sería mejor la rotura de la media movil de las ultimas sesiones, da un efecto parecido ¿vale? #1.-Buscar los maximos en el intervalo donde se dan las condiciones anteriores if (False): # quito esta parte en favor de las medias moviles if ((intervaloValido and marca2 == 'ultimoMaxDecreciente' and marca3 == 'ultimoMinCreciente_RSI') and (True)): #J3 ventanita = 5 primero = False for jj in range( (i_1 - ventanita), (i_2 + ventanita)): # Intervalo de lon minimos consecutivos if (dff.iloc[jj, marcaMxMn_] == 2 ): #busco marca de maximo en la tabla if (primero == False): valor_1 = dff.iloc[jj, 0] valor_2 = dff.iloc[jj, 0] fechaValor_1 = df.index[jj] fechaValor_2 = df.index[jj] primero = True else: valor_2 = dff.iloc[jj, 0] fechaValor_2 = df.index[jj] #slopeJ3_2points(x1,y1,x2,y2): #slopeJ3_2points(fechaValor_1,valor_1,fechaValor_2,valor_2) #revisar si Dff y Df cumplen para calcular la pendiente #1bis.- Analisis con MediaMovil del punto de entrada # Tengo la MA corta y larga; ahora comprobar que le precio supere al alza ######################## RSI por debajo del valor, divergencia y esperar que el precio supere la MA10 ventanita = 20 close_ = df.columns.get_loc("Close") ma_ = df.columns.get_loc("MA_10") for jj in range( (i), (i + ventanita)): # busco el corte en futuro inmediato if (jj >= len(df)): break if (df.iloc[jj, close_] > df.iloc[jj, ma_]): valor_2 = dff.iloc[jj, 0] fechaValor_2 = df.index[jj] break # **************************************************** PRESENTACION DE RESULTADO Y EXCEL #2.- Creo un dataFrame para almacenar PUNTOS RELEVANTES if (intervaloValido and marca2 == 'ultimoMaxDecreciente' and marca3 == 'ultimoMinCreciente_RSI'): #ojo print("señal en", i) print('index_de_RSI_menor_que_valor', index_de_RSI_menor_que_valor) print('valorMax', valorMax, 'en fecha', datevalorMax) print('valor2MaxDecreciente', valor2MaxDecreciente, 'en fecha', datevalor2MaxDecreciente) print('valorMin RSI', valorMax_RSI, 'en fecha', datevarMax_RSI) print('valor2MinCrecietne RSI', var2Max_RSI, 'en fecha', date2varMax_RSI) print('fecha', df.index[i]) datosRelevantes.loc[ref_KeyData, 'valorMIN_1'] = valorMax datosRelevantes.loc[ref_KeyData, 'valorMIN_2'] = valor2MaxDecreciente datosRelevantes.loc[ref_KeyData, 'fecha1'] = datevalorMax datosRelevantes.loc[ref_KeyData, 'fecha2'] = datevalor2MaxDecreciente datosRelevantes.loc[ref_KeyData, 'valorMIN_RSI_1'] = valorMax_RSI datosRelevantes.loc[ref_KeyData, 'valorMIN_RSI_2'] = var2Max_RSI datosRelevantes.loc[ref_KeyData, 'fechaRSI1'] = datevarMax_RSI datosRelevantes.loc[ref_KeyData, 'fechaRSI2'] = date2varMax_RSI # Quito del excel los maximos decrecientes, voy a probar con la MA. #datosRelevantes.loc[ref_KeyData,'valorMAX_1']= valor_1 datosRelevantes.loc[ref_KeyData, 'valorCORTE'] = valor_2 #datosRelevantes.loc[ref_KeyData,'fechaValorMAX_1']= fechaValor_1 datosRelevantes.loc[ref_KeyData, 'fechaValorCORTE'] = fechaValor_2 ref_KeyData = ref_KeyData + 1 continue #break """ señal en 773 index_de_RSI_menor_que_valor 2017-12-28 00:00:00 valorMax 18.30500030517578 en fecha 2018-01-04 00:00:00 valor2MaxDecreciente 18.299999237060547 en fecha 2018-01-09 00:00:00 valorMin RSI 17.764999389648438 en fecha 2018-01-02 00:00:00 valor2MinCrecietne RSI 18.125 en fecha 2018-01-08 00:00:00 fecha 2018-01-09 00:00:00 """ # Ordeno de más reciente a más lejano en el tiempo # Si no hay datosRelevantes salto if (not (datosRelevantes.empty)): datosRelevantes.sort_values(by=['fechaValorCORTE'], axis=0, ascending=False, inplace=True) datosRelevantes.reset_index(inplace=True) print(datosRelevantes.head()) print(datosRelevantes.tail()) # Preparado para buscar en tiempo Real. # La fecha de corte se tiene que haber dato en los ultimos 5 dias #busco si el dato es de las ultimas sesiones fechaAntiguedad = dt.datetime.today() - dt.timedelta( days=5 ) #datosRelevantes.loc[1,'fechaValorCORTE'] -dt.timedelta(days=5) # if (datosRelevantes.loc[0, 'fechaValorCORTE'] > fechaAntiguedad): salvarExcel(datosRelevantes, "deliverables/" + instrumento + "_señal_PRECIO_RSI") if (TELEGRAM__): telegram_send("**** Señal encontrada para el instrumento " + instrumento + " divergencia Precio_RSI")
print('Actual Price for 1 day out:', actual['Close'], '\n') # Display figure webbrowser.open('figs/%s.png' % ticker) elif (modelType == '2'): ticker = str(input("Enter Stock Ticker Name: ")) date = str(input("Enter Date Before 2020-10-01 (yyyy-mm-dd): ")) # LSTM # Load in LSTM model lstm = load_model('lstm_models/' + ticker + '.h5') # Pull stock data for ticker df = web.DataReader(ticker, data_source='yahoo', start='2015-01-01', end='2020-10-01') data = df['Adj Close'] # Reshape data dataset = np.array(data.values) dataset = np.reshape(dataset, (-1, 1)) scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) # Get index of specified date df.reset_index(inplace=True, drop=False) current_row = df.loc[df['Date'] == date] index = current_row.index.tolist() # Get training data from scaled
from finta import TA import pandas as pd from pylab import rcParams import matplotlib.pyplot as plt start = datetime.datetime.now() - datetime.timedelta(days=360) end = datetime.date.today() show_price = True stock = 'NIO' indicators = ['RSI'] stock = stock.upper() indicators = [x.upper() for x in indicators] data = pdr.DataReader(stock, 'yahoo', start, end) opens = data.Open.tolist() highs = data.High.tolist() lows = data.Low.tolist() closes = data.Close.tolist() volumes = data.Volume.tolist() ohlc = pd.DataFrame(list(zip(opens, highs, lows, closes, volumes)), columns=["open", "high", "low", "close", "volume"], index=data.index) if show_price == True: plt.plot(ohlc.close) plt.plot(TA.RSI(ohlc)) indicators.insert(0, 'Close Price')
def load_rates(freq='D'): """Load interest rates from https://fred.stlouisfed.org/. Parameters ---------- reload : bool, default True If True, download the data from source rather than loading pickled data freq : str {'D', 'W', 'M'}, default 'D' Frequency of time series; daily, weekly, or monthly start : str or datetime, default '1963', optional Start date of time series dropna : bool, default True If True, drop NaN along rows in resulting DataFrame how : str, default 'any' Passed to dropna() Original source --------------- Board of Governors of the Federal Reserve System H.15 Selected Interest Rates https://www.federalreserve.gov/releases/h15/ """ months = [1, 3, 6] years = [1, 2, 3, 5, 7, 10, 20, 30] # Nested dictionaries of symbols from fred.stlouisfed.org nom = { 'D': ['DGS%sMO' % m for m in months] + ['DGS%s' % y for y in years], 'W': ['WGS%sMO' % m for m in months] + ['WGS%sYR' % y for y in years], 'M': ['GS%sM' % m for m in months] + ['GS%s' % y for y in years] } tips = { 'D': ['DFII%s' % y for y in years[3:7]], 'W': ['WFII%s' % y for y in years[3:7]], 'M': ['FII%s' % y for y in years[3:7]] } fcp = { 'D': ['DCPF1M', 'DCPF2M', 'DCPF3M'], 'W': ['WCPF1M', 'WCPF2M', 'WCPF3M'], 'M': ['CPF1M', 'CPF2M', 'CPF3M'] } nfcp = { 'D': ['DCPN30', 'DCPN2M', 'DCPN3M'], 'W': ['WCPN1M', 'WCPN2M', 'WCPN3M'], 'M': ['CPN1M', 'CPN2M', 'CPN3M'] } short = { 'D': ['DFF', 'DPRIME', 'DPCREDIT'], 'W': ['FF', 'WPRIME', 'WPCREDIT'], 'M': ['FEDFUNDS', 'MPRIME', 'MPCREDIT'] } rates = list(itertools.chain.from_iterable([d[freq] for d in [nom, tips, fcp, nfcp, short]])) rates = pdr.DataReader(rates, 'fred', start=DSTART) l1 = ['Nominal'] * 11 + ['TIPS'] * 4 + ['Fncl CP'] * 3 \ + ['Non-Fncl CP'] * 3 + ['Short Rates'] * 3 l2 = ['%sm' % m for m in months] + ['%sy' % y for y in years] \ + ['%sy' % y for y in years[3:7]] \ + 2 * ['%sm' % m for m in range(1, 4)] \ + ['Fed Funds', 'Prime Rate', 'Primary Credit'] rates.columns = pd.MultiIndex.from_arrays([l1, l2]) return rates
def get_raw_prices(self): if (self.fetch_data == 'pdr'): self.attrs = [ 'Adj Close', 'Close', 'High', 'Open', 'Low', 'Volume' ] self.symbs = self.tickers_names.index self.midx = pd.MultiIndex.from_product([self.attrs, self.symbs], names=('Attributes', 'Symbols')) if (self.new_db_raw_prices): dt_raw_prices = pdr.DataReader(self.tickers_names, 'yahoo', self.start_date, self.end_date) dt_raw_prices['Dates'] = pd.to_datetime(dt_raw_prices.index, format='%Y-%m-%d') engine = create_engine("sqlite:///" + self.output_directory + "/dt_raw_prices_pdr.db", echo=False) dt_raw_prices.to_sql('dt_raw_prices', engine, if_exists='replace', index=False) else: engine = create_engine("sqlite:///" + self.output_directory + "/dt_raw_prices_pdr.db", echo=False) self.dt_raw_prices = pd.read_sql_table( 'dt_raw_prices', con=engine, parse_dates={'Dates': { 'format': '%Y-%m-%d' }}) self.dt_raw_prices.rename(columns={"('Dates', '')": 'Dates'}, inplace=True) self.dt_raw_prices.set_index('Dates', inplace=True) self.dt_raw_prices.columns = self.midx self.dt_select = self.dt_raw_prices[[self.ohlc, 'Volume']] # print(self.dt_raw_prices) # print(self.dt_select) elif (self.fetch_data == 'alphav'): self.symbs = [ 'NVDA', 'AMZN', 'TSLA', 'MRNA', 'AAPL' ] # Helper.nasdaq100_tickers(self.url_nasdaq).index.to_list() if (self.freq == 'monthly'): self.attrs = [ 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividend' ] elif (self.freq == 'daily'): self.attrs = [ 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividend', 'Split coeff' ] elif (self.freq == 'intraday'): self.attrs = ['Open', 'High', 'Low', 'Close', 'Volume'] self.midx = pd.MultiIndex.from_product([self.symbs, self.attrs], names=('Symbols', 'Attributes')) if (self.new_db_alphav_prices): api_key = os.getenv('ALPHAVANTAGE_API_KEY') ts = TimeSeries(key=api_key, output_format='pandas') data_all = [] meta_data_all = [] if (self.freq == 'monthly'): for i, el in enumerate(self.symbs): data, meta_data = ts.get_monthly_adjusted(symbol=el) data.columns = self.attrs data_all.append(data) meta_data_all.append(meta_data) elif (self.freq == 'daily'): for i, el in enumerate(self.symbs): data, meta_data = ts.get_daily_adjusted(symbol=el) data.columns = self.attrs data_all.append(data) meta_data_all.append(meta_data) elif (self.freq == 'intraday'): for i, el in enumerate(self.symbs): data, meta_data = ts.get_intraday(symbol=el, interval='60min', outputsize='compact') data.columns = self.attrs data_all.append(data) meta_data_all.append(meta_data) self.dt_raw_prices = pd.concat(data_all, axis=1) self.dt_raw_prices.columns = self.midx self.dt_raw_prices = self.dt_raw_prices[::-1] self.dt_raw_prices['Dates'] = pd.to_datetime( self.dt_raw_prices.index, format='%Y-%m-%d') engine = create_engine("sqlite:///" + self.output_directory + "/dt_raw_prices_alv.db", echo=False) self.dt_raw_prices.to_sql('dt_raw_prices', engine, if_exists='replace', index=False) else: engine = create_engine("sqlite:///" + self.output_directory + "/dt_raw_prices_alv.db", echo=False) self.dt_raw_prices = pd.read_sql_table( 'dt_raw_prices', con=engine, parse_dates={'Dates': { 'format': '%Y-%m-%d' }}) self.dt_raw_prices.rename(columns={"('Dates', '')": 'Dates'}, inplace=True) self.dt_raw_prices.set_index('Dates', inplace=True) self.dt_raw_prices.columns = self.midx self.dt_raw_prices.index = pd.to_datetime(self.dt_raw_prices.index) self.dt_raw_prices = self.dt_raw_prices.swaplevel(0, 1, 1).sort_index(1) # select the tickers based on ohlc parameters if (self.freq == 'monthly' or self.freq == 'daily'): self.dt_select = self.dt_raw_prices.loc[:, self.dt_raw_prices. columns. get_level_values(0). isin([self.ohlc] + ['Volume'])] elif (self.freq == 'intraday'): self.dt_select = self.dt_raw_prices.loc[:, self.dt_raw_prices. columns. get_level_values(0). isin([ 'Close', 'Volume' ])]
import matplotlib.pyplot as plt import mplfinance as mpf import datetime as dt crypto = "BTC" currency = "USD" start = dt.datetime(2020, 1, 1) end = dt.datetime.now() # Pandas DataReader data source options # yahoo, google, fred (St. Louise Fed), famafrench (Kenneth French's data library) # World bank # https://pandas.pydata.org/pandas-docs/version/0.18.1/remote_data.html#remote-data-fred data = web.DataReader(f"{crypto}-{currency}", "yahoo", start, end) # for creating multiple currency charts btc = web.DataReader(f"{crypto}-{currency}", "yahoo", start, end) eth = web.DataReader(f"ETH-{currency}", "yahoo", start, end) # to make the chart logrithimic plt.yscale("log") # print(data) # basic line chart on Close # plt.plot(data['Close']) # plt.show() # plot candlestick