def test_company_prices(quotation_fixture, monkeypatch): #expected_dates = ['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04', '2021-01-05', '2021-01-06'] monkeypatch.setattr(mdl, 'make_superdf', mock_superdf_all_stocks) # basic check required_timeframe = Timeframe(from_date='2021-01-01', n=6) df = company_prices(['ABC', 'OTHER'], required_timeframe, fields='last_price', missing_cb=None, transpose=True) assert isinstance(df, pd.DataFrame) assert len(df) == 2 assert list(df.index) == ['ABC', 'OTHER'] assert list(df.columns) == ['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04', '2021-01-05', '2021-01-06'] is_other_nan = list(np.isnan(df.loc['OTHER'])) assert is_other_nan == [False, True, True, True, True, True] # check impute missing functionality df2 = company_prices(['ABC', 'OTHER'], required_timeframe, fields='last_price', transpose=True) assert list(df2.loc['OTHER']) == [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # finally check that a multi-field DataFrame is as requested monkeypatch.setattr(mdl, 'make_superdf', mock_superdf_many_fields)
def pe_trends_df(timeframe: Timeframe) -> pd.DataFrame: # we fetch all required fields for this view in one call to company_prices() - more efficient on DB df = company_prices(None, timeframe, fields=["pe", "eps", "number_of_shares"], missing_cb=None) return df
def make_kmeans_cluster_dataframe(timeframe: Timeframe, chosen_k: int, stocks: Iterable[str]) -> tuple: prices_df = company_prices(stocks, timeframe, fields="last_price") # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # print(prices_df) s1 = prices_df.pct_change().mean() * 252 s2 = prices_df.pct_change().std() * math.sqrt(252.0) # print(s1) data_df = pd.DataFrame.from_dict({"return": s1, "volatility": s2}) # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # print(data_df) data_df = data_df.dropna( ) # calculation may produce inf/nan so purge now... data = np.asarray( [np.asarray(data_df["return"]), np.asarray(data_df["volatility"])]).T distortion = [] for k in range(2, 20): k_means = KMeans(n_clusters=k) k_means.fit(data) distortion.append(k_means.inertia_) # computing K-Means with K = 5 (5 clusters) centroids, _ = kmeans(data, chosen_k) # assign each sample to a cluster idx, _ = vq(data, centroids) data_df["cluster_id"] = idx return distortion, chosen_k, centroids, idx, data_df
def recalc_queryset(self, **kwargs): n_days = kwargs.get("n_days", 30) stocks_to_consider = filter_stocks_to_search( self.request, kwargs.get("what_to_search")) period1 = kwargs.get("period1", 20) period2 = kwargs.get("period2", 200) matching_stocks = set() self.timeframe = Timeframe(past_n_days=n_days) assert period2 > period1 df = company_prices(stocks_to_consider, Timeframe(past_n_days=n_days + period2), transpose=False) # print(df) wanted_dates = set(self.timeframe.all_dates()) for s in filter(lambda asx_code: asx_code in df.columns, stocks_to_consider): last_price = df[s] # we filter now because it is after the warm-up period for MA200.... ma20 = last_price.rolling(period1).mean().filter( items=wanted_dates, axis=0) ma200 = (last_price.rolling(period2, min_periods=min([ 50, 3 * period1 ])).mean().filter(items=wanted_dates, axis=0)) matching_dates = set( [xo[1] for xo in calc_ma_crossover_points(ma20, ma200)]) if len(matching_dates.intersection(wanted_dates)) > 0: matching_stocks.add(s) return list(matching_stocks)
def setup_optimisation_matrices(stocks, timeframe: Timeframe, exclude_price, warning_cb): # ref: https://pyportfolioopt.readthedocs.io/en/latest/UserGuide.html#processing-historical-prices stock_prices = company_prices(stocks, timeframe, fields="last_price", missing_cb=None) stock_prices = stock_prices.fillna(method="bfill", limit=10, axis=0) latest_date = stock_prices.index[-1] earliest_date = stock_prices.index[0] # print(stock_prices) stock_prices = remove_bad_stocks(stock_prices, earliest_date, exclude_price, warning_cb) stock_prices = remove_bad_stocks(stock_prices, latest_date, exclude_price, warning_cb) latest_prices = stock_prices.loc[latest_date] first_prices = stock_prices.loc[earliest_date] all_returns = returns_from_prices(stock_prices, log_returns=False).fillna(value=0.0) # check that the matrices are consistent to each other assert stock_prices.shape[1] == latest_prices.shape[0] assert stock_prices.shape[1] == all_returns.shape[1] assert all_returns.shape[0] == stock_prices.shape[0] - 1 assert len(stock_prices.columns) > 0 # must have at least 1 stock assert len(stock_prices) > 7 # and at least one trading week of data # print(stock_prices.shape) # print(latest_prices) # print(all_returns.shape) return all_returns, stock_prices, latest_prices, first_prices
def get_dataset(dataset_wanted, request, timeframe=None): assert (dataset_wanted in set(["market_sentiment", "eps-per-sector"]) or dataset_wanted.startswith("kmeans-") or dataset_wanted.startswith("financial-metrics-") or dataset_wanted.startswith("stock-quotes-")) if timeframe is None: timeframe = Timeframe(past_n_days=300) if dataset_wanted == "market_sentiment": df = cached_all_stocks_cip(timeframe) return df elif dataset_wanted == "kmeans-watchlist": _, _, _, _, df = make_kmeans_cluster_dataframe( timeframe, 7, user_watchlist(request.user)) return df elif dataset_wanted == "kmeans-etfs": _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7, all_etfs()) return df elif dataset_wanted.startswith("stock-quotes-"): stock = dataset_wanted[len("stock-quotes-"):] validate_stock(stock) df = company_prices([stock], timeframe=timeframe, fields=all_stock_fundamental_fields, missing_cb=None) df['stock_code'] = stock return df elif dataset_wanted.startswith("kmeans-sector-"): sector_id = int(dataset_wanted[14:]) sector = Sector.objects.get(sector_id=sector_id) if sector is None or sector.sector_name is None: raise Http404("No stocks associated with sector") asx_codes = all_sector_stocks(sector.sector_name) _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7, asx_codes) return df elif dataset_wanted.startswith("financial-metrics-"): stock = dataset_wanted[len("financial-metrics-"):] validate_stock(stock) df = financial_metrics(stock) if df is not None: # excel doesnt support timezones, so we remove it first colnames = [d.strftime("%Y-%m-%d") for d in df.columns] df.columns = colnames # FALLTHRU return df elif dataset_wanted == "eps-per-sector": df, _ = pe_trends_df(Timeframe(past_n_days=180)) df = make_pe_trends_eps_df(df, stocks_by_sector()) df = df.set_index("asx_code", drop=True) return df else: raise ValueError("Unsupported dataset {}".format(dataset_wanted))
def make_portfolio_performance_dataframe( stocks: Iterable[str], timeframe: Timeframe, purchases: Iterable[VirtualPurchase]) -> pd.DataFrame: def sum_portfolio(df: pd.DataFrame, date_str: str, stock_items): validate_date(date_str) portfolio_worth = sum( map(lambda t: df.at[t[0], date_str] * t[1], stock_items)) return portfolio_worth df = company_prices(stocks, timeframe, transpose=True) rows = [] stock_count = defaultdict(int) stock_cost = defaultdict(float) portfolio_cost = 0.0 for d in [ datetime.strptime(x, "%Y-%m-%d").date() for x in timeframe.all_dates() ]: d_str = str(d) if d_str not in df.columns: # not a trading day? continue purchases_to_date = filter(lambda vp, d=d: vp.buy_date <= d, purchases) for purchase in purchases_to_date: if purchase.buy_date == d: portfolio_cost += purchase.amount stock_count[purchase.asx_code] += purchase.n stock_cost[purchase.asx_code] += purchase.amount portfolio_worth = sum_portfolio(df, d_str, stock_count.items()) # print(df) # emit rows for each stock and aggregate portfolio for asx_code in stocks: cur_price = df.at[asx_code, d_str] if np.isnan(cur_price): # price missing? ok, skip record continue assert cur_price is not None and cur_price >= 0.0 stock_worth = cur_price * stock_count[asx_code] rows.append({ "portfolio_cost": portfolio_cost, "portfolio_worth": portfolio_worth, "portfolio_profit": portfolio_worth - portfolio_cost, "stock_cost": stock_cost[asx_code], "stock_worth": stock_worth, "stock_profit": stock_worth - stock_cost[asx_code], "date": d_str, "stock": asx_code, }) df = pd.DataFrame.from_records(rows) df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d") return df
def dataframe(ld: LazyDictionary) -> pd.DataFrame: momentum_timeframe = Timeframe( past_n_days=n_days + 200 ) # to warmup MA200 function df = company_prices( (stock,), momentum_timeframe, fields=all_stock_fundamental_fields, missing_cb=None, ) return df
def test_company_prices( quotation_fixture, monkeypatch ): # pylint: disable=unused-argument,redefined-outer-name # expected_dates = ['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04', '2021-01-05', '2021-01-06'] monkeypatch.setattr(mdl, "make_superdf", mock_superdf_all_stocks) # basic check required_timeframe = Timeframe(from_date="2021-01-01", n=6) df = company_prices( ["ABC", "OTHER"], required_timeframe, fields="last_price", missing_cb=None, transpose=True, ) assert isinstance(df, pd.DataFrame) assert len(df) == 2 assert list(df.index) == ["ABC", "OTHER"] assert list(df.columns) == [ "2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04", "2021-01-05", "2021-01-06", ] is_other_nan = list(np.isnan(df.loc["OTHER"])) assert is_other_nan == [False, True, True, True, True, True] # check impute missing functionality df2 = company_prices( ["ABC", "OTHER"], required_timeframe, fields="last_price", transpose=True ) assert list(df2.loc["OTHER"]) == [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # finally check that a multi-field DataFrame is as requested monkeypatch.setattr(mdl, "make_superdf", mock_superdf_many_fields)
def show_fundamentals(request, stock=None, n_days=2 * 365): validate_user(request.user) validate_stock(stock) timeframe = Timeframe(past_n_days=n_days) df = company_prices( [stock], timeframe, fields=("eps", "volume", "last_price", "annual_dividend_yield", \ "pe", "change_in_percent", "change_price", "market_cap", \ "number_of_shares"), missing_cb=None ) #print(df) df['change_in_percent_cumulative'] = df['change_in_percent'].cumsum( ) # nicer to display cumulative df = df.drop('change_in_percent', axis=1) fundamentals_plot = plot_fundamentals(df, stock) context = { "asx_code": stock, "is_fundamentals": True, "fundamentals_plot": fundamentals_plot } return render(request, "stock_fundamentals.html", context)
def show_pe_trends(request): """ Display a plot of per-sector PE trends across stocks in each sector ref: https://www.commsec.com.au/education/learn/choosing-investments/what-is-price-to-earnings-pe-ratio.html """ validate_user(request.user) timeframe = Timeframe(past_n_days=180) pe_df = company_prices(None, timeframe, fields="pe", missing_cb=None, transpose=True) eps_df = company_prices(None, timeframe, fields="eps", missing_cb=None, transpose=True) ss = stocks_by_sector() ss_dict = {row.asx_code: row.sector_name for row in ss.itertuples()} #print(ss_dict) eps_stocks = set(eps_df.index) n_stocks = len(pe_df) positive_pe_stocks = set(pe_df[pe_df.sum(axis=1) > 0.0].index) all_stocks = set(pe_df.index) n_non_zero_sum = len(positive_pe_stocks) #print(exclude_zero_sum) records = [] trading_dates = set(pe_df.columns) sector_counts_all_stocks = ss['sector_name'].value_counts() all_sectors = set(ss['sector_name'].unique()) pe_pos_df = pe_df.filter(items=positive_pe_stocks, axis=0).merge(ss, left_index=True, right_on='asx_code') assert len(pe_pos_df) <= len(positive_pe_stocks) and len(pe_pos_df) > 0 market_avg_pe_df = pe_pos_df.mean(axis=0).to_frame( name='market_pe') # avg P/E by date series market_avg_pe_df['date'] = pd.to_datetime(market_avg_pe_df.index) #print(market_avg_pe_df) breakdown_by_sector_pe_pos_stocks_only = pe_pos_df[ 'sector_name'].value_counts() #print(breakdown_by_sector_pe_pos_stocks_only) sector_counts_pe_pos_stocks_only = { s[0]: s[1] for s in breakdown_by_sector_pe_pos_stocks_only.items() } #print(sector_counts_pe_pos_stocks_only) #print(sector_counts_all_stocks) #print(sector_counts_pe_pos_stocks_only) for ymd in filter(lambda d: d in trading_dates, timeframe.all_dates( )): # needed to avoid KeyError raised during DataFrame.at[] calls below sum_pe_per_sector = defaultdict(float) sum_eps_per_sector = defaultdict(float) for stock in filter(lambda code: code in ss_dict, all_stocks): sector = ss_dict[stock] assert isinstance(sector, str) if stock in eps_stocks: eps = eps_df.at[stock, ymd] if isnan(eps): continue sum_eps_per_sector[sector] += eps if stock in positive_pe_stocks: pe = pe_df.at[stock, ymd] if isnan(pe): continue assert pe >= 0.0 sum_pe_per_sector[sector] += pe #print(sum_pe_per_sector) assert len(sector_counts_pe_pos_stocks_only) == len(sum_pe_per_sector) assert len(sector_counts_all_stocks) == len(sum_eps_per_sector) for sector in all_sectors: pe_sum = sum_pe_per_sector.get(sector, None) n_pe = sector_counts_pe_pos_stocks_only.get(sector, None) pe_mean = pe_sum / n_pe if pe_sum is not None else None eps_sum = sum_eps_per_sector.get(sector, None) records.append({ 'date': ymd, 'sector': sector, 'mean_pe': pe_mean, 'sum_pe': pe_sum, 'sum_eps': eps_sum, 'n_stocks': n_stocks, 'n_sector_stocks_pe_only': n_pe }) df = pd.DataFrame.from_records(records) #print(df[df["sector"] == 'Utilities']) #print(df) context = { "title": "PE Trends: {}".format(timeframe.description), "n_stocks": n_stocks, "timeframe": timeframe, "n_stocks_with_pe": n_non_zero_sum, "sector_pe_plot": plot_sector_field(df, field="mean_pe"), "sector_eps_plot": plot_sector_field(df, field="sum_eps"), "market_pe_plot": plot_series(market_avg_pe_df, x='date', y='market_pe', y_axis_label="Market-wide mean P/E", color=None, use_smooth_line=True) } return render(request, "pe_trends.html", context)
def optimise_portfolio( stocks, timeframe: Timeframe, algo="ef-minvol", max_stocks=80, total_portfolio_value=100 * 1000, exclude_price=None, warning_cb=None, **kwargs, ): assert len(stocks) >= 1 assert timeframe is not None assert total_portfolio_value > 0 assert max_stocks >= 5 ( all_returns, stock_prices, latest_prices, first_prices, ) = setup_optimisation_matrices(stocks, timeframe, exclude_price, warning_cb) market_prices = company_prices(("A200", ), Timeframe(past_n_days=180), missing_cb=None) market_prices.index = pd.to_datetime(market_prices.index, format="%Y-%m-%d") market_prices = pd.Series(market_prices["A200"]) quotes, ymd = valid_quotes_only("latest", ensure_date_has_data=True) for t in ((10, 0.0001), (20, 0.0005), (30, 0.001), (40, 0.005), (50, 0.01)): filtered_stocks, n_stocks = select_suitable_stocks( all_returns, stock_prices, max_stocks, *t) # since the sample of stocks might be different, we must recompute each iteration... filtered_stocks = filtered_stocks.sample(n=n_stocks, axis=1) # print(len(filtered_stocks.columns)) market_caps = { q.asx_code: q.market_cap for q in quotes if q.asx_code in filtered_stocks.columns } ld = (LazyDictionary() ) # must start a new dict since each key is immutable after use ld["n_stocks"] = n_stocks ld["filtered_stocks"] = filtered_stocks ld["market_prices"] = market_prices ld["market_caps"] = market_caps ld["total_portfolio_value"] = total_portfolio_value ld["returns_by"] = kwargs.get("returns_by", "by_prices") strategy, kwargs = assign_strategy(ld, algo) try: run_iteration( ld, strategy, first_prices, latest_prices, filtered_stocks, **kwargs, ) # NB: we dont bother caching these plots since we must calculate so many other values but we need to serve them via cache_plot() anyway ld["efficient_frontier_plot"] = cache_plot( secrets.token_urlsafe(32), plot_random_portfolios, datasets=ld) ld["correlation_plot"] = lambda ld: cache_plot( secrets.token_urlsafe(32), lambda ld: plot_covariance(ld["m"], plot_correlation=True). figure, datasets=ld, ) return ld except ValueError as ve: if warning_cb: warning_cb( "Unable to optimise stocks with min_unique={} and var_min={}: n_stocks={} - {}" .format(t[0], t[1], n_stocks, str(ve))) # try next iteration raise ve print("*** WARNING: unable to optimise portolio!") return LazyDictionary()
def show_purchase_performance(request): purchase_buy_dates = [] purchases = [] stocks = [] for stock, purchases_for_stock in user_purchases(request.user).items(): stocks.append(stock) for purchase in purchases_for_stock: purchase_buy_dates.append(purchase.buy_date) purchases.append(purchase) purchase_buy_dates = sorted(purchase_buy_dates) # print("earliest {} latest {}".format(purchase_buy_dates[0], purchase_buy_dates[-1])) timeframe = Timeframe(from_date=str(purchase_buy_dates[0]), to_date=all_available_dates()[-1]) df = company_prices(stocks, timeframe, transpose=True) rows = [] stock_count = defaultdict(int) stock_cost = defaultdict(float) portfolio_cost = 0.0 for d in [ datetime.strptime(x, "%Y-%m-%d").date() for x in timeframe.all_dates() ]: d_str = str(d) if d_str not in df.columns: # not a trading day? continue purchases_to_date = filter(lambda vp, d=d: vp.buy_date <= d, purchases) for purchase in purchases_to_date: if purchase.buy_date == d: portfolio_cost += purchase.amount stock_count[purchase.asx_code] += purchase.n stock_cost[purchase.asx_code] += purchase.amount portfolio_worth = sum_portfolio(df, d_str, stock_count.items()) #print(df) # emit rows for each stock and aggregate portfolio for asx_code in stocks: cur_price = df.at[asx_code, d_str] if np.isnan(cur_price): # price missing? ok, skip record continue assert cur_price is not None and cur_price >= 0.0 stock_worth = cur_price * stock_count[asx_code] rows.append({ "portfolio_cost": portfolio_cost, "portfolio_worth": portfolio_worth, "portfolio_profit": portfolio_worth - portfolio_cost, "stock_cost": stock_cost[asx_code], "stock_worth": stock_worth, "stock_profit": stock_worth - stock_cost[asx_code], "date": d_str, "stock": asx_code, }) t = plot_portfolio(pd.DataFrame.from_records(rows)) portfolio_performance_figure, stock_performance_figure, profit_contributors_figure = t context = { "title": "Portfolio performance", "portfolio_title": "Overall", "portfolio_figure": portfolio_performance_figure, "stock_title": "Stock", "stock_figure": stock_performance_figure, "profit_contributors": profit_contributors_figure, } return render(request, "portfolio_trends.html", context=context)