def test_portfolio_moex_pass_0(): d = d_pass_moex[0] data = download(**d) data = clean_data(data) pf = portfolio(data = data) pf.plot_portfolios() pf.print_results()
def test_portfolio_yfinance_pass_1(): d = d_pass_yfinance[0] data = download(**d) data = clean_data(data) pf = portfolio(data=data, risk_free_rate=0.001, freq=252, num_portfolios=10000) pf.plot_portfolios() pf.print_results()
def __init__(self, ticker, data: pd.DataFrame, **kwargs): if not isinstance(data, pd.DataFrame): raise ValueError('data should be a pandas.DataFrame') if isinstance(data.columns, pd.MultiIndex): self._data = clean_data(data).dropna(how="all") else: self._data = data.dropna(how="all") if not (ticker in self._data.columns): raise ValueError(f'Ticker {ticker} is not provided in DataFrame') self._ticker = ticker self._data = pd.DataFrame(self._data[ticker]) self._risk_free_rate = kwargs.get('risk_free_rate', 0.001) self._freq = kwargs.get('freq', 252) self._type = kwargs.get('type', 'log') self._daily_returns = daily_log_returns(self._data) ##########PROPERTIES########## self._returns = mean_returns(self._data, freq=self._freq, type=self._type).values[0] self._volatility = volatility(self._data).values[0] self._downside_volatility = downside_volatility(self._data).values[0] self._sharp = (self._returns - self._risk_free_rate)/self._volatility self._sortino = (self.returns - self._risk_free_rate)/self._downside_volatility self._skew = self._data.skew().values[0] self._kurtosis = self._data.kurt().values[0]
def test_portfolio_csv_pass_0(): d = d_pass_csv[0] data = download(**d) data = clean_data(data) pf = portfolio(data = data, num_portfolios=10000) pf.plot_portfolios() pf.print_results()
def __init__(self, data: pd.DataFrame, risk_free_rate=0.001, freq=252): if not isinstance(freq, int): raise ValueError('Frequency must be an integer') elif freq <= 0: raise ValueError('Freq must be > 0') else: self._freq = freq if not isinstance(risk_free_rate, (float, int)): raise ValueError('Risk free rate must be a float or an integer') else: self._risk_free_rate = risk_free_rate if not isinstance(data, pd.DataFrame): raise ValueError('data should be a pandas.DataFrame') if isinstance(data.columns, pd.MultiIndex): self._data = clean_data(data) else: self._data = data self._efficient_portfolios = None
def __init__(self, data: pd.DataFrame, weights = None, risk_free_rate=0.0425, freq=252): if not isinstance(freq, int): raise ValueError('Frequency must be an integer') elif freq <= 0: raise ValueError('Freq must be > 0') else: self._freq = freq if not isinstance(risk_free_rate, (float, int)): raise ValueError('Risk free rate must be a float or an integer') else: self._risk_free_rate = risk_free_rate if not isinstance(data, pd.DataFrame): raise ValueError('data should be a pandas.DataFrame') if isinstance(data.columns, pd.MultiIndex): self._data = clean_data(data) else: self._data = data self._portfolios = None self._min_vol_port = None self._min_downside_vol_port = None self._max_sharpe_port = None self._max_sortino_port = None self._df_results = None ##################### if weights is None: self._weights = np.array([1./len(self._data.columns) for i in range(len(self._data.columns))]) else: self._weights = np.array(weights) self._cvm = cov_matrix(self._data) self._mr = mean_returns(self._data, freq=self._freq)
tickers = ['CSCO', 'V', 'ABBV', 'SBUX', 'MCD', 'INTC', \ 'GM', 'HPQ', 'EA', 'FDX', 'NKE', 'BERY', \ 'GOOGL', 'GOOG', 'WMT', 'NVDA', 'TSLA', 'GE', \ 'AAL', 'AMD', 'ADBE', 'AMZN', 'PYPL', 'MSFT', \ 'ATVI', 'FB', 'NEM', 'NFLX', 'AVGO', \ 'QCOM', 'BABA', 'MA', 'AAPL', 'BA', 'TWTR', \ 'MU', 'T', 'F', 'BIDU', 'BIIB', 'XOM', \ 'DIS', 'PFE', 'BMY'] data = download(source=Source.YFINANCE, tickers=tickers, start_date=start_date14_17, end_date=end_date14_17) data = clean_data(data) df_ratios = ratios(data=data) df_sharp = df_ratios[df_ratios['Sharp Ratio'] > 1.0] df_sortino = df_ratios[df_ratios['Sortino Ratio'] > 1.0] pf_stocks_sortino = df_sortino.index.to_list() pf_stocks_sharp = df_sortino.index.to_list() print('=' * 80) print('Sortino portfolio 14-17') sortino_data14_17 = download(source=Source.YFINANCE, tickers=pf_stocks_sortino, start_date=start_date14_17, end_date=end_date14_17) sortino_data14_17 = clean_data(sortino_data14_17)
end_date = '2020-11-20' tickers = ['CSCO', 'V', 'ABBV', 'SBUX', 'MCD', 'INTC', \ 'GM', 'HPQ', 'EA', 'FDX', 'NKE', 'BERY', \ 'GOOGL', 'GOOG', 'WMT', 'NVDA', 'TSLA', 'GE', \ 'AAL', 'AMD', 'ADBE', 'AMZN', 'PYPL', 'MSFT', \ 'ATVI', 'FB', 'NEM', 'NFLX', 'AVGO', \ 'QCOM', 'BABA', 'MA', 'AAPL', 'BA', 'TWTR', \ 'MU', 'T', 'F', 'BIDU', 'BIIB', 'XOM', \ 'DIS', 'PFE', 'BMY'] data = download(source=Source.YFINANCE, tickers=tickers, start_date=start_date, end_date=end_date) data = clean_data(data) df_ratios = ratios(data=data) df_sharp = df_ratios[df_ratios['Sharp Ratio'] > 1.0] df_sortino = df_ratios[df_ratios['Sortino Ratio'] > 1.0] with pd.option_context('display.max_rows', None, 'display.max_columns', None): print(df_sharp) print(df_sortino) pf_stocks_sortino = df_sortino.index.to_list() pf_stocks_sharp = df_sortino.index.to_list() sortino_data = download(source=Source.YFINANCE, tickers=pf_stocks_sortino, start_date=start_date, end_date=end_date)
def cluster_stocks(data: pd.DataFrame, n_clusters=5, verbose=False): """ Gets the number of clusters and tries to cluster(KMeans) stocks based on the mean returns and volatility. The decision about optimal number of clusters can be made based on an elbow curve. Max number of cluster is 20. Good article about elbow curve: https://blog.cambridgespark.com/how-to-determine-the-optimal-number-of-clusters-for-k-means-clustering-14f27070048f The function creates following plots: 1. Elbow curve to make decision about optimal number of clusters 2. A plot with K-Means clustered by return and volatility stocks and centroids. 3. Plots with clusters and their daily return cumulative sum over the given period :Input: : data: ``pandas.DataFrame`` stock prices :n_clusters: ``int`` (default: 5), should be > 2 and less than number of stocks in portfolio :verbose: ``boolean`` (default= ``False``), whether to print out clusters :Output: :clusters: ``list`` of (Stocks) tickers. """ if not isinstance(n_clusters, int): raise ValueError('Total number of clusters must be integer.') elif n_clusters < 2: raise ValueError(f'Total number of clusters({len(data.columns)}) must be > 2.') elif len(data.columns) < 3: raise ValueError(f'Total number of stocks in portfolio({len(data.columns)}) must be > 2.') elif n_clusters > len(data.columns): raise ValueError(f'Total number of clusters({n_clusters}) ' f'must be <= number of stocks({len(data.columns)}) in portfolio') if isinstance(data.columns, pd.MultiIndex): data = clean_data(data) pf_return_means = mean_returns(data, type='log') pf_daily_returns = daily_log_returns(data) pf_volatility = volatility(data) # format the data as a numpy array to feed into the K-Means algorithm data_ret_vol = np.asarray([np.asarray(pf_return_means), np.asarray(pf_volatility)]).T distorsions = [] max_n_clusters = min(20, len(data.columns)) for k in range(2, max_n_clusters): k_means = KMeans(n_clusters=k) k_means.fit(X=data_ret_vol) distorsions.append(k_means.inertia_) plt.plot( range(2, max_n_clusters), distorsions, linestyle='-', color='red', lw=2, label='Elbow curve', ) plt.title('Elbow curve') plt.xlabel('Number of clusters') plt.ylabel('Distortion') plt.grid(True) plt.legend() # Step size of the mesh. Decrease to increase the quality of the VQ. h = .002 # point in the mesh [x_min, x_max]x[y_min, y_max]. x_min, x_max = data_ret_vol[:, 0].min() - 0.1, data_ret_vol[:, 0].max() + 0.1 y_min, y_max = data_ret_vol[:, 1].min() - 0.1, data_ret_vol[:, 1].max() + 0.1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) km = KMeans(n_clusters=n_clusters) km.fit(data_ret_vol) centroids = km.cluster_centers_ # Obtain labels for each point in mesh. Use last trained model. Z = km.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) # some plotting using numpy's logical indexing plt.figure(figsize=(10, 6)) plt.imshow(Z, interpolation='nearest', extent=(xx.min(), xx.max(), yy.min(), yy.max()), cmap=plt.cm.Paired, aspect='auto', origin='lower') # Plot the centroids as a white X plt.scatter(centroids[:, 0], centroids[:, 1], marker='*', s=420, color='white', zorder=10) # Plot stocks plt.plot(data_ret_vol[:, 0], data_ret_vol[:, 1], 'o', markersize=12) plt.title('K-means clustering\n' 'Centroids are marked with white star') plt.xlabel('Returns') plt.ylabel('Volatility') idx, _ = vq(data_ret_vol, centroids) clusters = {} for i in list(set(idx)): clusters[i] = [] for name, cluster in zip(pf_return_means.index, idx): clusters[cluster].append(name) # Calculating avg comulative daily return for each cluster and store # in pf_daily_returns under special stock name - avg{Cluster index} for i in list(set(idx)): s = 'avg' + str(i) pf_daily_returns[s] = pf_daily_returns[clusters[i]].mean(axis=1) for n in range(n_clusters): # plot clusters plt.figure(figsize=(10, 6)) for stock in clusters[n]: # plot stocks as grey lines plt.plot(pf_daily_returns[stock].cumsum(), 'gray', linewidth=1) plt.title(f'Cluster #{n}') plt.ylabel("Daily returns cumulative sum") # plot average to see cluster dynamic s = 'avg' + str(n) plt.plot(pf_daily_returns[s].cumsum(), 'red', linewidth=3) plt.xticks(rotation=30) plt.grid(True) if verbose: print(f'Cluster #{n}') print(clusters[n]) return clusters
'ATVI', 'FB', 'NEM', 'NFLX', 'AVGO', \ 'QCOM', 'BABA', 'MA', 'AAPL', 'BA', 'TWTR', \ 'MU', 'T', 'F', 'BIDU', 'BIIB', 'XOM', \ 'DIS', 'PFE', 'BMY'] # # tickers = ['BABA', 'MA', 'AAPL', 'BA', 'TWTR', \ # 'MU', 'T', 'F', 'BIDU', 'BIIB', 'XOM', \ # 'DIS', 'PFE', 'BMY'] #tickers = ['AKRN', 'PIKK', 'PLZL', 'SELG'] data = download(source=Source.YFINANCE, tickers=tickers, start_date=start_date, end_date=end_date) data = clean_data(data) #data.to_csv(path) # # pf = portfolio(data=data, risk_free_rate=0.01, freq=252) pf.mc() pf.plot_portfolios() pf.print_results() # ef = sharp_efficient_frontier(data=data) msr = ef.max_sharpe_ratio() mv = ef.min_volatility() # res = pf_valuation(msr['x'], data=data) print(res) print(np.round(msr['x'], decimals=4))
def test_cluster_stocks_pass_0(): d = d_pass_csv[0] data = download(**d) data = clean_data(data) data.dropna(how="all").replace([np.inf, -np.inf], np.nan) cluster_stocks(data, n_clusters=4, verbose=True)