def ETF_describe_window(frame, ticker='SPY', start=None, end=None): """ Display a comprehensive analysis in windows window """ import tkinter as tk from invest.get_data import get_latest_ETF from invest.calculation import get_alpha_beta, get_return_vol from invest.calculation import add_dividend, get_dividend_yield from time_series.functions import resample from gui.tkinter_widget import display_dataframe for widget in frame.winfo_children(): widget.destroy() data = get_latest_ETF(ticker, start=start) if end is not None: data = data[:end] tk.Label(frame, text=ticker, font=("Helvetica", 14))\ .grid(row=1, column=1, padx=10, pady=5) tk.Label(frame, text="from \n{} \nto \n{}".format(data.index[0].date(), data.index[-1].date()) ).grid(row=1, column=2, padx=10, pady=5) df = np.round( data.iloc[-1,:-1].to_frame().T, 2) df.index = [x.date() for x in df.index] df.index.name = "Today:" display_dataframe(frame, df).grid(row=3, column=1, columnspan=2, padx=2, pady=2) # Alpha and Beta are calculated by using SPY as market and zero risk-free rate # Weekly close price is used market = get_latest_ETF('SPY') wm = resample(market, style="week", method='close') weekly = resample(data, style="week", method='close') a, b = get_alpha_beta(weekly.Close, wm.Close, risk_free=0, scale=52, dspl=False) tk.Label(frame, text="Alpha: {}\nBeta: {}".format(np.round(a, 4), np.round(b, 4)))\ .grid(row=4, column=1, padx=2, pady=2) # Return and volatility are calculated by using weekly close price rv = get_return_vol(weekly.Close, scale=52) tk.Label(frame, text="Return: {:.2%}\nVolatility: {:.2%}".format( rv.Return[0], rv.Volatility[0])).grid(row=4, column=2, padx=2, pady=2) add_dividend(data, price='Close', adj='Adj Close', out='Dividend') div = get_dividend_yield(data, price='Close', div='Dividend', style='simple').rename("Dividend Yield (%)") div = np.round(div[-5:].to_frame()*100, 2) for c in div.index: div.at[c,'Payment No.'] = np.sum(data[data.Dividend!=0].index.year==c) div.index.name = "Year" display_dataframe(frame, div).grid(row=5, column=1, padx=2, pady=2) divs = data[data.Dividend>0].Dividend[-5:].rename("Dividend per share").to_frame() divs.index = [d.date() for d in divs.index] divs.index.name = "Date" display_dataframe(frame, divs).grid(row=5, column=2, padx=2, pady=2) return frame
def find_allocation(tickers, start='2015-01-01', end=None, column='Adj Close'): """ Find the allocation with the highest Sharpe ratio. input: tickers tickers of the stocks/ETFs start start date, default '2015-01-01' end end date, default current column the column of data to use, default 'Adj Close' Return a pandas Serires of allocations """ from invest.get_data import read_portfolio from invest.calculation import minimize_risk from time_series.functions import resample from basic.plot import plot_stacked_bar data = read_portfolio(tickers, column, start, end) weekly = resample(data, column, style="week", method='close') rv = minimize_risk(weekly, returns=None, strict=True, riskfree=None, max_alloc=1, scale=52, ret=False, verbose=True, plotit=True) plt.figure(figsize=(14, 3)) plot_stacked_bar(rv[rv.columns[-3::-1]].T.values, names=rv.columns[-3::-1], ticks=np.round(rv.index * 100, 2)) plt.xlabel("Return in %") plt.show() sharpe = rv.Return / rv.Volatility arg = sharpe.argmax() return rv.loc[arg, :][:-2]
def investment_performance(ticker, price, quantity, start, end=None): """ Calculate performance of given stock/ETF. input: ticker ticker of the stock/ETF start start date end end date, default current """ from invest.useful import convert_time from invest.get_data import read_ETF from invest.plot import plot_day_price from invest.calculation import add_dividend, get_return_vol from time_series.functions import resample start, end = convert_time(start, end) data = read_ETF(ticker)[start:end] plot_day_price(data) plt.plot([start], [price], 'ro') plt.show() add_dividend(data, price='Close', adj='Adj Close', out='Dividend') temp = data[['Dividend']][data.Dividend != 0].copy() temp.index = temp.index.date display(temp.T) weekly = resample(data, style='week', method='close') rv = get_return_vol(weekly[['Close', 'Adj Close']], scale=52, ret=False, plotit=False) rv['Total Return'] = data[['Close', 'Adj Close']].iloc[-1, :] / data[ ['Close', 'Adj Close']].iloc[0, :] - 1 rv = rv * 100 rv['Gain'] = np.round(price * quantity * rv['Total Return'] / 100, 2) display(rv) print("Actual gain without reinvest: {:.2f}".format( (data.Close[-1] - price) * quantity)) print("Dividend gain: {:.2f}".format(data.Dividend.sum() * quantity))
def show_yield(ticker, start=None, end=None, weeks=52): """ Calculate annualized return. Simple return is calculated input: ticker ticker of the stock/ETF start start date, default '2000-01-01' end end date, default current weeks number of weeks for each calculation, default 52 Return a DataFrame with three rows: Adj Close, Dividend, Close """ from invest.useful import convert_time from invest.get_data import read_ETF from invest.calculation import add_dividend, get_returns from time_series.functions import resample start, end = convert_time(start, end) data = read_ETF(ticker)[start:end] add_dividend(data, price='Close', adj='Adj Close', out='Dividend') data['Dividend'] = np.cumsum(data.Dividend) weekly = resample(data, style='week', method='close') weekly = weekly[(weekly.shape[0] - 1) % weeks::weeks] df = get_returns(weekly[['Adj Close', 'Dividend', 'Close']], 'simple') df['Dividend'] = (weekly.Dividend.diff() / weekly.Close.shift(1)) df = df * 100 * 52 / weeks from datetime import timedelta ds = df.index xlim = [ ds[0] - timedelta(days=3 * weeks), ds[-1] + timedelta(days=3 * weeks) ] plt.figure(figsize=(14, 3)) plt.title("Annualized Return") plt.hlines(xmin=xlim[0], xmax=xlim[1], y=0) plt.hlines(xmin=xlim[0], xmax=xlim[1], y=df['Adj Close'].mean(), linestyle='--', color='#1f77b4') plt.hlines(xmin=xlim[0], xmax=xlim[1], y=df.Dividend.mean(), linestyle='--', color='#ff7f0e') plt.bar(ds, df.Close, width=5 * weeks, label='Yield') plt.bar(ds, df.Dividend, bottom=df.Close, width=5 * weeks, label='Div_Yield') plt.plot(ds, df['Adj Close'], 'o-', label='Adj_Yield') plt.xlabel("Date to sell") plt.xlim(xlim) plt.ylim([np.min(df.values) - 0.2, np.max(df.values) + 0.2]) plt.legend(bbox_to_anchor=(1.01, 0.9), loc='upper left') plt.grid() df.index = df.index.date print(np.round(df, 2)) plt.show() return np.round(df, 2).T
def get_dividend_yield(data, price='Close', div='Dividend', style='simple'): """ Calculate annual yield of dividend with price and dividend/adjusted price. input: data a pandas DataFrame with at least two columns price the column name of original prices, default 'Close' div the column name of dividend, default 'Dividend' style calculation method: 'simple' (default), or 'log' Either adj or div should be not None. Return a Series with dividend yield at year end. """ logger = logging.getLogger(__name__) from time_series.functions import resample rsp = resample(data[price], style='year', method='close') div = resample(data[div], style='year', method='sum') ret = (div / rsp.shift(1)).iloc[1:].fillna(0) ret.index = ret.index.year if style == 'simple': return ret if style == 'log': return np.log(1 + ret)
def show_trend(ticker, start=None, end=None): """ Plot price change, return and volatility input: ticker ticker of the stock/ETF start start date, default '2000-01-01' end end date, default current Return correlation between return and volatility """ from invest.useful import convert_time from invest.get_data import read_ETF from invest.calculation import get_returns from time_series.functions import moving_agg, resample from basic.mathe import covariance, correlation start, end = convert_time(start, end) data = read_ETF(ticker)[start:end] fig = plt.figure(figsize=(14,6)) fig.add_axes([0.05,0.68,0.94,0.3]) for c in ['Close','Adj Close']: plt.plot(data.index, data[c], label=c) plt.xlim(data.index[0],data.index[-1]) plt.xticks([]) plt.ylabel("Price ($)") plt.legend(loc='best') weekly = resample(data, style='week', method='close') df = get_returns(weekly.Close, 'simple') fig.add_axes([0.05,0.38,0.94,0.3]) m = moving_agg(df, window=52, step=1, func=np.sum) plt.plot(df.index[51:], m*100) plt.hlines(xmin=data.index[0], xmax=data.index[-1], y=0, linestyle='--') plt.xlim(data.index[0],data.index[-1]) plt.xticks([]) plt.ylabel("Annual Return (%)") plt.legend(loc='best') fig.add_axes([0.05,0.08,0.94,0.3]) v = moving_agg(df, window=52, step=1, func=covariance) v = np.sqrt(v*52) plt.plot(df.index[51:], v*100) plt.xlim(data.index[0],data.index[-1]) plt.ylabel("Volatility (%)") plt.gca().set_ylim(bottom=0) plt.legend(loc='best') corr = correlation(m, v) print("Correlation between return and volatility:", corr) plt.show() return corr
def generate(tickers, features): """ input: tickers a list of tickers of stocks/ETFs features a list of feature names to be calculated Return a pandas DataFrame """ from invest.get_data import read_ETF from time_series.functions import resample from invest.calculation import get_returns data = pd.DataFrame() for t in tickers: etf = read_ETF(t) weekly = resample(etf, style='week', method='close') ret = get_returns(weekly, style='simple') rv = get_return_vol(ret, scale=52, ret=True, plotit=False)
def set_benchmark(self): ticker = self.benchmark.get().upper() if ticker == "": self._logger_.info("Remove benchmark") self._data_['benchmark'] = 0 else: self._logger_.info("Set benchmark as {}".format(ticker)) try: data = read_ETF(ticker, file_dir=path + "\\data_temp") except: self._logger_.error("Cannot load benchmark {}".format(ticker)) else: self._data_['benchmark'] = get_returns(resample( data.Close, column=None, style="week", method='close'), style='simple', fillna=False) self.update_plot()
def initial_plot(self, column='Adj Close', style='week', start='2015-1-1'): self._logger_.debug("Initialize plots for portfolio window") tickers = self.select.get_right() self._data_ = get_returns(resample(read_portfolio(tickers, column=column, start=start), column=None, style=style, method='close'), style='simple', fillna=False) self._data_['benchmark'] = 0 data = self._data_.iloc[-52:, :-1].dropna(axis=1, how='any') rv = get_return_vol(pd.concat([data * 3, -data], axis=1), scale=52, ret=True, plotit=False) fig = return_vol(rv.Return, rv.Volatility, rv.index) fig.axes[0].plot([0], [0], 'r*') return fig, pie_plot([10, 6], labels=['a', 'b'])
def Sharpe_filter(tickers, keep=50, start='2015-01-01', end=None, column='Adj Close'): """ Find stocks/ETFs with high Sharpe ratio. input: tickers tickers of the stocks/ETFs keep number of stocks/ETFs to keep start start date, default '2015-01-01' end end date, default current column the column of data to use, default 'Adj Close' Return a list of tickers """ from invest.get_data import read_portfolio from invest.calculation import get_return_vol from time_series.functions import resample data = read_portfolio(tickers, column, start, end) weekly = resample(data, column, style="week", method='close') rv = get_return_vol(weekly, scale=52, ret=False, plotit=False) rv['Sharpe'] = rv.Return / rv.Volatility rv = rv.sort_values('Sharpe')[::-1] return rv[:keep].index.values
def holdings_performance(file, frame=None): """ Calculate performance of current holdings. input: file a csv file containing all transactions columns are Date, Ticker, Shares, Price frame a tkinter frame to show results. Default None """ logger = logging.getLogger(__name__) try: holdings = pd.read_csv(file, index_col=None, parse_dates=[0]).dropna() except Exception as err: logger.error("Cannot open holdings file {}".format(file)) logger.error(err) return logger.debug("input transactions:") logger.debug("{} \t{}\t{}\t{}".format(*holdings.columns)) for i in range(holdings.shape[0]): logger.debug("{} \t{} \t{} \t{}".format(holdings.iloc[i, 0].date(), *holdings.iloc[i, 1:])) tickers = holdings.Ticker.unique() if len(tickers) == 0: logger.info("No data provided") return from invest.get_data import get_latest_ETFs first_date = holdings.Date.min() - timedelta(days=7) data = get_latest_ETFs(tickers, start=first_date) from invest.calculation import add_dividend for t in tickers: add_dividend(data, price=('Close', t), adj=('Adj Close', t), out=('Dividend', t)) columns = [ "Ticker", "Buy Date", "Buy Price", "Current Price", "Buy Shares", "Reinvested Shares", "Capital Gain", "Dividend Gain", "Total Gain" ] output = pd.DataFrame(columns=columns, index=holdings.index) output['Ticker'] = holdings.Ticker.values output['Buy Date'] = holdings.Date.dt.date.values output['Buy Price'] = holdings.Price.values for t in tickers: index = output.index[output.Ticker == t] output.loc[index, 'Current Price'] = data['Close'][t][-1] # calculate reinvested value # settlement date is two business days after buy date dates = holdings.Date[index] loc = np.array([data.index.get_loc(x) for x in dates]) + 2 loc = np.clip(loc, 0, data.shape[0] - 1) output.loc[index, 'Close'] = data['Close'][t].iloc[loc].values output.loc[index, 'Adj Close'] = data['Adj Close'][t].iloc[loc].values output.loc[index, 'Dividend'] = [ data.Dividend[t].iloc[d:].sum() for d in loc ] output['Current Price'] = output['Current Price'].astype(float) output['Buy Shares'] = holdings.Shares.values current_share = output['Close'] / output['Adj Close'] * output['Buy Shares'] output['Reinvested Shares'] = np.round( current_share - output['Buy Shares'], 5) output['Value'] = current_share * output['Current Price'] days = (date.today() - output['Buy Date']).dt.days / 365 output['Capital Gain'] = (output['Current Price'] - output['Buy Price']) * output['Buy Shares'] output['Capital Gain %'] = (output['Current Price'] / output['Buy Price'] - 1) / days * 100 output['Dividend Gain'] = output['Dividend'] * output['Buy Shares'] output['Dividend Gain %'] = output['Dividend'] / output[ 'Buy Price'] / days * 100 output['Total Gain'] = output['Current Price'] * current_share - output[ 'Buy Price'] * output['Buy Shares'] out = file.replace('/', '\\').split('\\')[:-1] out.append("output.csv") out = '\\'.join(out) logger.info("Detailed investment summary saved in file {}".format(out)) output.to_csv(out, index=False) summary = output.groupby('Ticker')['Value', 'Capital Gain', 'Dividend Gain', 'Total Gain'].sum() summary['Dividend Gain'] = np.round( summary['Total Gain'] - summary['Capital Gain'], 2) summary['Value'] = np.round(summary['Value'], 2) summary['Capital Gain'] = np.round(summary['Capital Gain'], 2) summary['Total Gain'] = np.round(summary['Total Gain'], 2) from time_series.functions import resample from invest.calculation import get_returns weekly = resample(data, style="week", method='close') dates = list(np.sort(holdings.Date.unique())) dates.append(date.today()) # dates with transactions rets = pd.Series() # weekly returns of total investments vals = pd.Series() # total value of investments perf = pd.Series() # performance, i.e. current value / invested value max_value = 0 for d in range(len(dates) - 1): tickers = holdings.Ticker[holdings.Date <= dates[d]] from_date = dates[d] - np.timedelta64(7, 'D') to_date = dates[d + 1] shares = holdings.Shares[holdings.Date <= dates[d]] price = weekly['Close'][tickers][from_date:to_date] value = pd.Series(price.values.dot(shares.values), index=price.index) rets = rets.append(get_returns(value)) price = data['Close'][tickers][dates[d]:dates[d + 1]] value = pd.Series(price.values.dot(shares.values), index=price.index) vals = vals.append(value) buys = holdings.Price[holdings.Date <= dates[d]] perf = perf.append(value / np.dot(buys.values, shares.values)) xmin, xmax = vals.index[0], vals.index[-1] plt.figure(figsize=(15, 5)) plt.plot(rets.index, rets * 100, color='#1f77b4') plt.hlines(y=0, xmin=xmin, xmax=xmax, linestyles='--', color='#1f77b4') plt.ylabel("Weekly Return (%)", fontsize=20, color='#1f77b4') plt.yticks(fontsize=14, color='#1f77b4') plt.legend() plt.twinx() plt.plot(perf.index, (perf - 1) * 100, color='#ff7f0e') plt.hlines(y=0, xmin=xmin, xmax=xmax, linestyles='--', color='#ff7f0e') # plt.gca().set_ylim(bottom=0) plt.ylabel("Total Return (%)", fontsize=20, color='#ff7f0e') plt.yticks(fontsize=14, color='#ff7f0e') plt.xlim(xmin, xmax) from gui.tkinter_widget import display_dataframe, plot_embed_toolbar import tkinter as tk if frame is None: root = tk.Tk() else: root = frame plot_embed_toolbar(root, fig=plt.gcf()).grid(row=0, column=0, columnspan=2) tk.Label(root, text="""Notes: \n 'Annual Return' is just the rescaled weekly return. 'Performance' is the total return percentage of the day (will be affected by buy/sell activities).\n """).grid(row=1, column=0, padx=10, pady=10) display_dataframe(root, summary).grid(row=1, column=1, padx=10, pady=10) total_value = output['Value'].sum() total_invest = (output['Buy Shares'] * output['Buy Price']).sum() total_gain = total_value - total_invest tk.Label(root, font='bold', text="total {:.2f} / {:.2f} = {:.2%}"\ .format(total_gain, total_invest, total_gain/total_invest))\ .grid(row=2, column=1, padx=10, pady=10) if frame is None: root.mainloop()
def portfolio_analysis(tickers, alloc=None, start='2010-01-01', end=None): """ Given a series of tickers, return a summery of each ETF in a DataFrame input: tickers tickers of stocks/ETFs alloc allocation for given stocks/TEFs, default None start start date, default "2010-01-01" end end date, default today """ from invest.get_data import read_ETF, read_portfolio from invest.calculation import get_return_vol, get_alpha_beta, minimize_risk from invest.useful import convert_time from time_series.functions import resample from basic.useful import progress_bar from basic.plot import plot_correlation, plot_stacked_bar start, end = convert_time(start, end) if not (alloc is None): if len(tickers) != len(alloc): raise ValueError( "Length of shares and tickers should be the same if shares is given." ) alloc = np.array(alloc) / np.sum(alloc) plt.figure(figsize=(15, 3)) plt.subplot(131) plt.title("Calculated by Close Price") close = read_portfolio(tickers, 'Close', start, end) close = close / close.iloc[0, :] weekly = resample(close, style="week", method='close') al_clo = minimize_risk(weekly, returns=None, strict=True, riskfree=None, max_alloc=1, scale=52, ret=False, verbose=False, plotit=True) if not (alloc is None): weekly = weekly.dot(alloc) rv = get_return_vol(weekly, scale=52) * 100 plt.plot(rv.Volatility, rv.Return, 'bo') plt.subplot(132) plt.title("Calculated by Adjusted Close Price") adj = read_portfolio(tickers, 'Adj Close', start, end) adj = adj / adj.iloc[0, :] weekly = resample(adj, style="week", method='close') al_adj = minimize_risk(weekly, returns=None, strict=True, riskfree=None, max_alloc=1, scale=52, ret=False, verbose=False, plotit=True) if not (alloc is None): weekly = weekly.dot(alloc) rv = get_return_vol(weekly, scale=52) * 100 plt.plot(rv.Volatility, rv.Return, 'bo') plt.subplot(133) df = pd.DataFrame() def func(t): data = read_ETF(t) if data.index[0] < start: data = data[start:end] a, b = get_alpha_beta(data.Close, ret_type='simple', dspl=False) df.at[t, 'alpha'] = a df.at[t, 'beta'] = b progress_bar(tickers, func, disable=True) plt.plot(df.beta, df.alpha, 'o') if not (alloc is None): total = close.dot(alloc) alpha, beta = get_alpha_beta(total, ret_type='simple', dspl=False) plt.plot(beta, alpha, 'bo') plt.xlabel('Beta') plt.ylabel("Alpha") plt.hlines(xmin=-0.1, xmax=1.2, y=0, linestyles='--') plt.xlim(-0.1, 1.2) for t in df.index: plt.text(df.beta[t], df.alpha[t], t) plt.figure(figsize=(14, 3)) plt.title("Allocation calculated from close price") plot_stacked_bar(al_clo[al_clo.columns[-3::-1]].T.values, names=al_clo.columns[-3::-1], ticks=np.round(al_clo.Return * 100, 2)) plt.xlabel("Return in %") plt.figure(figsize=(14, 3)) plt.title("Allocation calculated from adjusted close price") plot_stacked_bar(al_adj[al_adj.columns[-3::-1]].T.values, names=al_adj.columns[-3::-1], ticks=np.round(al_adj.Return * 100, 2)) plt.xlabel("Return in %") plt.figure(figsize=(14, 3)) plt.title("Price change (not adjusted)") for c in close.columns: plt.plot(close.index, close[c], '-', label=c) plt.hlines(xmin=close.index[0], xmax=close.index[-1], y=1, linestyles='--') plt.xlim(close.index[0], close.index[-1]) plt.legend(bbox_to_anchor=(1.01, 0.99), loc='upper left') if not (alloc is None): plt.plot(close.index, total, 'k-') plt.figure(figsize=(6, 5)) plot_correlation(close) plt.show()