def cached_portfolio_performance(user): assert isinstance(user, get_user_model()) username = user.username overall_key = f"{username}-portfolio-performance" stock_key = f"{username}-stock-performance" contributors_key = f"{username}-contributor-performance" def data_factory( ld: LazyDictionary, ): # dont create the dataframe unless we have to - avoid exxpensive call! purchase_buy_dates = [] purchases = [] stocks = [] for stock, purchases_for_stock in user_purchases(user).items(): stocks.append(stock) for purchase in purchases_for_stock: purchase_buy_dates.append(purchase.buy_date) purchases.append(purchase) purchase_buy_dates = sorted(purchase_buy_dates) # print("earliest {} latest {}".format(purchase_buy_dates[0], purchase_buy_dates[-1])) timeframe = Timeframe(from_date=str(purchase_buy_dates[0]), to_date=all_available_dates()[-1]) return make_portfolio_performance_dataframe(stocks, timeframe, purchases) ld = LazyDictionary() ld["df"] = lambda ld: data_factory(ld) return ( cache_plot(overall_key, plot_overall_portfolio, datasets=ld), cache_plot(stock_key, plot_portfolio_stock_performance, datasets=ld), cache_plot(contributors_key, plot_portfolio_contributors, datasets=ld), )
def show_trends(request): user = request.user validate_user(user) stocks = user_watchlist(user) timeframe = Timeframe(past_n_days=300) ld = LazyDictionary() ld["cip_df"] = lambda ld: selected_cached_stocks_cip(stocks, timeframe) ld["trends"] = lambda ld: calculate_trends(ld["cip_df"]) ld["rank"] = lambda ld: rank_cumulative_change( ld["cip_df"].filter(ld["trends"].keys(), axis="index"), timeframe ) trending_companies_plot = cache_plot( f"{user.username}-watchlist-trends", lambda ld: plot_company_rank(ld), datasets=ld, ) context = { "watchlist_trends": ld["trends"], "timeframe": timeframe, "trending_companies_uri": trending_companies_plot, "trending_companies_plot_title": "Trending watchlist stocks (ranked): {}".format( timeframe.description ), } return render(request, "watchlist-rank.html", context=context)
def show_total_earnings(request): validate_user(request.user) def data_factory(df: pd.DataFrame) -> pd.DataFrame: df = df.pivot( index=["asx_code", "fetch_date"], columns="field_name", values="field_value" ) required = (df.number_of_shares > 0) & (df.eps > 0.0) df = df[required] # ignore stocks which have unknowns # print(df) df["total_earnings"] = df["eps"] * df["number_of_shares"] df = df.dropna(how="any", axis=0) df = df.reset_index() df = df.pivot(index="asx_code", columns="fetch_date", values="total_earnings") df = df.merge(stocks_by_sector(), left_index=True, right_on="asx_code") df = df.set_index("asx_code", drop=True) df = df.groupby("sector_name").sum() df["sector_name"] = df.index df = df.melt(id_vars="sector_name", var_name="fetch_date") assert set(df.columns) == set(["sector_name", "fetch_date", "value"]) df["fetch_date"] = pd.to_datetime(df["fetch_date"], format="%Y-%m-%d") return df def plot(df: pd.DataFrame) -> p9.ggplot: plot = ( p9.ggplot( df, p9.aes( x="fetch_date", y="value", color="sector_name", # group="sector_name" ), ) + p9.geom_line(size=1.2) + p9.facet_wrap("~sector_name", ncol=2, scales="free_y") + p9.scale_y_continuous(labels=label_shorten) ) return user_theme( plot, y_axis_label="Total sector earnings ($AUD, positive contributions only)", figure_size=(12, 14), subplots_adjust={"wspace": 0.25}, ) ld = LazyDictionary() ld["timeframe"] = Timeframe(past_n_days=180) ld["pe_trends_df"] = lambda ld: pe_trends_df(ld["timeframe"]) ld["df"] = lambda ld: data_factory(ld["pe_trends_df"]) context = { "title": "Earnings per sector over time", "timeframe": ld["timeframe"], "plot_uri": cache_plot( f"total-earnings-by-sector:{ld['timeframe'].description}", lambda ld: plot(ld["df"]), datasets=ld, ), } return render(request, "total_earnings_by_sector.html", context=context)
def plot_multiple_metrics( self, country: str, topic: WorldBankTopic, indicators: Iterable[WorldBankIndicators], ) -> p9.ggplot: def make_plot(ld: LazyDictionary): plot_df = None has_yearly = False n_datasets = 0 add_points = False for i in indicators: try: df = fetch_data( i, [country], fill_missing=lambda df: df.resample("AS").asfreq()) if df is None or len(df) == 0: continue except: # Data load fail? print( f"WARNING: unable to load worldbank dataset {i} - ignored" ) traceback.print_exc() continue n_datasets += 1 df["dataset"] = f"{i.name} ({i.wb_id})" if "-yearly-" in i.tag: has_yearly = True pct_na = (df["metric"].isnull().sum() / len(df)) * 100.0 if pct_na > 30.0 or df["metric"].count() <= 3: add_points = True if plot_df is None: plot_df = df else: # if any indicator is sparse, we enable points for all indicators to be able to see them all plot_df = plot_df.append(df) # print(plot_df) figure_size = (12, n_datasets * 1.5) kwargs = {"group": "dataset", "colour": "dataset"} plot = worldbank_plot( plot_df, "", has_yearly, figure_size=figure_size, add_points=add_points, **kwargs, ) plot += p9.facet_wrap("~dataset", ncol=1, scales="free_y") return user_theme(plot, figure_size=figure_size) indicator_id_str = "-".join([i.wb_id for i in indicators]) return cache_plot( f"{country}-{indicator_id_str}-scmm-worldbank-plot", make_plot, )
def sector_performance(self, ld: LazyDictionary) -> str: sector = ld.get("sector") if len(ld['sector_performance_df']) < 1: return None return cache_plot( f"{sector}-sector-performance", lambda ld: plot_sector_performance(ld["sector_performance_df"], sector), datasets=ld, dont_cache=True, )
def process_form(self, cleaned_data): timeframe = Timeframe(past_n_days=cleaned_data.get("timeframe", 180)) bond = cleaned_data.get("bond_name", None) df = get_bond_prices(bond, timeframe) plot_uri = cache_plot( f"{bond}-{timeframe.description}", lambda ld: self.make_plot(df, timeframe), ) return { "title": "Visualise bond yields by country", "plot_uri": "/png/" + plot_uri, "plot_title": f"Bond prices: {bond} over {timeframe.description}", }
def process_form(self, cleaned_data: dict) -> dict: timeframe = Timeframe(past_n_days=cleaned_data["timeframe"]) crypto_symbol = cleaned_data.get("currency", "BTC") crypto_prices = get_crypto_prices(crypto_symbol, timeframe) # print(crypto_prices) plot_uri = cache_plot( f"{crypto_symbol}-{timeframe.description}", lambda ld: self.make_plot(crypto_prices, timeframe), ) return { "title": "Visualize cryptocurrency prices over time", "plot_uri": "/png/" + plot_uri, "plot_title": f"{crypto_symbol} over {timeframe.description}", }
def plot_indicator( self, country: WorldBankCountry, topic: WorldBankTopic, indicator: WorldBankIndicators, ) -> str: def make_plot(): df = fetch_data( indicator, [country.name], fill_missing=lambda df: df.resample("AS").asfreq(), ) return worldbank_plot(df, indicator.name, True) return cache_plot( f"{indicator.wb_id}-{country.name}-scsm-worldbank-plot", make_plot)
def process_form(self, cleaned_data): timeframe = Timeframe(past_n_days=cleaned_data.get("timeframe", 180)) commodity_str = cleaned_data.get("commodity", "Gold") df = get_commodity_prices(commodity_str, timeframe) plot_uri = cache_plot( f"{commodity_str}-{timeframe.description}", lambda ld: self.make_plot(df, timeframe), ) return { "title": "Visualize commodity prices over time", "plot_uri": "/png/" + plot_uri, "plot_title": f"Commodity prices: {commodity_str} over {timeframe.description}", }
def form_valid(self, form): sector = form.cleaned_data.get("sector", "Communication Services") norm_method = form.cleaned_data.get("normalisation_method", None) n_days = form.cleaned_data.get("n_days", 30) ld = LazyDictionary() ld["stocks"] = lambda ld: all_sector_stocks(sector) ld["timeframe"] = Timeframe(past_n_days=n_days) ld["cip_df"] = lambda ld: selected_cached_stocks_cip( ld["stocks"], ld["timeframe"]) context = self.get_context_data() def winner_results(df: pd.DataFrame) -> list: # compute star performers: those who are above the mean on a given day counted over all days count = defaultdict(int) avg = df.mean(axis=0) for col in df.columns: winners = df[df[col] > avg[col]][col] for winner in winners.index: count[winner] += 1 results = [] for asx_code, n_wins in count.items(): x = df.loc[asx_code].sum() # avoid "dead cat bounce" stocks which fall spectacularly and then post major increases in percentage terms if x > 0.0: results.append((asx_code, n_wins, x)) return list(reversed(sorted(results, key=lambda t: t[2]))) context.update({ "title": "Past {} day sector performance: box plot trends".format(n_days), "n_days": n_days, "sector": sector, "plot_uri": cache_plot( f"{sector}-recent-sector-view-{ld['timeframe'].description}-{norm_method}", lambda ld: plot_boxplot_series( ld["cip_df"], normalisation_method=norm_method), datasets=ld, ), "winning_stocks": winner_results(ld["cip_df"]), }) return render(self.request, self.template_name, context)
def form_valid(self, form): df = fetch_dataframe(self.data_flow.name) if df is None or len(df) == 0: raise Http404(f"Unable to load dataframe: {self.data_flow}") filter_performance = [] for k, v in form.cleaned_data.items(): rows_at_start = len(df) print( f"Filtering rows for {k}: total {rows_at_start} rows at start") k = k[len("dimension_"):] if rows_at_start < 10000: unique_values_left = df[k].unique() else: unique_values_left = set() df = df[df[k] == v] rows_at_end = len(df) filter_performance.append( (k, v, rows_at_start, rows_at_end, unique_values_left)) print(f"After filtering: now {rows_at_end} rows") if len(df) == 0: warning(self.request, f"No rows of data left after filtering: {k} {v}") break plot = None plot_title = "" if len(df) > 0: plot_title, x_axis_column, y_axis_column, df = detect_dataframe( df, self.data_flow) plot = (p9.ggplot(df, p9.aes(x=x_axis_column, y=y_axis_column)) + p9.geom_point() + p9.geom_line()) plot = user_theme(plot) context = self.get_context_data() cache_key = "-".join(sorted(form.cleaned_data.values())) + "-ecb-plot" context.update({ "dataflow": self.data_flow, "dataflow_name": self.data_flow.name, "filter_performance": filter_performance, "plot_title": plot_title, "plot_uri": cache_plot(cache_key, lambda: plot), }) return render(self.request, self.template_name, context)
def form_valid(self, form): context = self.get_context_data() key = form.cleaned_data["dataflow"] data_df = data(key) self.fixed_datapoints = set() # required to perform de-dupe context.update({ "plot_title": key, "plot_fixed_datapoints": self.fixed_datapoints, "plot_uri": cache_plot( f"{key}-abs-plot", lambda ld: self.plot_abs_dataframe(data_df), ), }) return render(self.request, template_name=self.template_name, context=context)
def plot_sector_monthly_mean_returns(ld: LazyDictionary) -> dict: all_stocks = ld["monthly_returns_by_stock"] ret = {} ss = ld["stocks_by_sector"] all_stock_average_df = all_stocks.mean(axis=1).to_frame(name="average") all_stock_average_df["dataset"] = "All stock average" final_df = all_stock_average_df # print(ss) for current_sector in ss["sector_name"].unique(): #print(current_sector) wanted_stocks = set( ss[ss["sector_name"] == current_sector]["asx_code"]) #print(wanted_stocks) filtered_stocks = all_stocks.filter(items=wanted_stocks, axis="columns").dropna( axis='columns', how='all').dropna(axis='rows', how='all') if current_sector == "Class Pend": print(filtered_stocks) df = filtered_stocks.mean(axis=1).to_frame(name="average") df["dataset"] = current_sector print(df) final_df = final_df.append(df) final_df["date"] = pd.to_datetime(final_df.index, format="%Y-%m-%d") plot = ( p9.ggplot(final_df, p9.aes(x="date", y="average", fill="average")) + p9.geom_bar(stat="identity") + p9.facet_wrap("~dataset", ncol=2, scales="free_y")) ret["month-by-month-average-returns"] = cache_plot( "monthly-mean-returns", lambda ld: user_theme( plot, y_axis_label="Average percent return per month", figure_size=(12, 10), subplots_adjust={"wspace": 0.15}, axis_text_x=p9.element_text(angle=30, size=7), asxtrade_want_fill_continuous=True, ), ) return ret
def plot_country_comparison( self, countries: Iterable[str], topic: WorldBankTopic, indicator: WorldBankIndicators, ) -> p9.ggplot: def fix_gaps(df: pd.DataFrame) -> pd.DataFrame: """ If say the plot timeframe is between 1960 and 2020 but there are missing rows where some years used to be, this method will re-introduce new rows into the dataframe and set the country column to the specified country. It must only be called when len(countries) == 1 or a pandas error will occur """ df = df.resample("AS").asfreq() df["country"] = next(iter( countries)) # NB: this is only correct wen len(countries) == 1 return df def make_plot(ld: LazyDictionary): resample_lambda = None if len(countries) == 1: resample_lambda = fix_gaps df = fetch_data( indicator, countries, fill_missing=resample_lambda ) # not resampling to fill gaps at this time, unless only one country is being plotted: TODO BUG FIXME kwargs = {"group": "country", "colour": "country"} # print(df) plot = worldbank_plot(df, indicator.name, True, **kwargs) if len(countries) > 1: plot += p9.theme(legend_position="right") return plot countries_str = "-".join(countries) return cache_plot( f"{indicator.wb_id}-{countries_str}-scm-worldbank-plot", make_plot, )
def show_pe_trends(request): """ Display a plot of per-sector PE trends across stocks in each sector ref: https://www.commsec.com.au/education/learn/choosing-investments/what-is-price-to-earnings-pe-ratio.html """ validate_user(request.user) def make_pe_trends_market_avg_df(ld: LazyDictionary) -> pd.DataFrame: df = ld["data_df"] ss = ld["stocks_by_sector"] pe_pos_df, _ = make_pe_trends_positive_pe_df(df, ss) market_avg_pe_df = pe_pos_df.mean(axis=0, numeric_only=True).to_frame( name="market_pe") # avg P/E by date series market_avg_pe_df["date"] = pd.to_datetime(market_avg_pe_df.index) return market_avg_pe_df def sector_eps_data_factory(ld: LazyDictionary) -> pd.DataFrame: df = ld["data_df"] n_stocks = df["asx_code"].nunique() pe_df, positive_pe_stocks = ld["positive_pe_tuple"] eps_df = ld["eps_df"] ss = ld["stocks_by_sector"] # print(positive_pe_stocks) eps_stocks = set(eps_df.index) ss_dict = {row.asx_code: row.sector_name for row in ss.itertuples()} # print(ss_dict) trading_dates = set(pe_df.columns) trading_dates.remove("sector_name") sector_counts_all_stocks = ss["sector_name"].value_counts() all_sectors = set(ss["sector_name"].unique()) breakdown_by_sector_pe_pos_stocks_only = pe_df[ "sector_name"].value_counts() # print(breakdown_by_sector_pe_pos_stocks_only) sector_counts_pe_pos_stocks_only = { s[0]: s[1] for s in breakdown_by_sector_pe_pos_stocks_only.items() } # print(sector_counts_pe_pos_stocks_only) # print(sector_counts_all_stocks) # print(sector_counts_pe_pos_stocks_only) records = [] for ymd in filter( lambda d: d in trading_dates, ld["timeframe"].all_dates() ): # needed to avoid KeyError raised during DataFrame.at[] calls below sum_pe_per_sector = defaultdict(float) sum_eps_per_sector = defaultdict(float) for stock in filter(lambda code: code in ss_dict, positive_pe_stocks): sector = ss_dict[stock] assert isinstance(sector, str) if stock in eps_stocks: eps = eps_df.at[stock, ymd] if isnan(eps): continue sum_eps_per_sector[sector] += eps if stock in positive_pe_stocks: pe = pe_df.at[stock, ymd] if isnan(pe): continue assert pe >= 0.0 sum_pe_per_sector[sector] += pe # print(len(sector_counts_all_stocks)) # print(len(sum_eps_per_sector)) assert len(sector_counts_pe_pos_stocks_only) >= len( sum_pe_per_sector) assert len(sector_counts_all_stocks) >= len(sum_eps_per_sector) for sector in all_sectors: pe_sum = sum_pe_per_sector.get(sector, None) n_pe = sector_counts_pe_pos_stocks_only.get(sector, None) pe_mean = pe_sum / n_pe if pe_sum is not None else None eps_sum = sum_eps_per_sector.get(sector, None) records.append({ "date": ymd, "sector": sector, "mean_pe": pe_mean, "sum_pe": pe_sum, "sum_eps": eps_sum, "n_stocks": n_stocks, "n_sector_stocks_pe_only": n_pe, }) df = pd.DataFrame.from_records(records) # print(df[df["sector"] == 'Utilities']) # print(df) return df ld = LazyDictionary() ld["data_df"] = lambda ld: pe_trends_df(ld["timeframe"]) ld["positive_pe_tuple"] = lambda ld: make_pe_trends_positive_pe_df( ld["data_df"], ld["stocks_by_sector"]) ld["market_avg_pe_df"] = lambda ld: make_pe_trends_market_avg_df(ld) ld["eps_df"] = lambda ld: make_pe_trends_eps_df(ld["data_df"]) ld["sector_eps_df"] = lambda ld: sector_eps_data_factory(ld) ld["stocks_by_sector"] = stocks_by_sector() ld["timeframe"] = Timeframe(past_n_days=180) td = ld["timeframe"].description # these arent per-user plots: they can safely be shared across all users of the site, so the key reflects that sector_pe_cache_key = f"{td}-by-sector-pe-plot" sector_eps_cache_key = f"{td}-by-sector-eps-plot" market_pe_cache_key = f"{td}-market-pe-mean" market_pe_plot_uri = cache_plot( market_pe_cache_key, lambda ld: plot_series( ld["market_avg_pe_df"], x="date", y="market_pe", y_axis_label="Market-wide mean P/E", color=None, use_smooth_line=True, ), datasets=ld, ) context = { "title": "PE Trends", "n_stocks": ld["data_df"]["asx_code"].nunique(), "timeframe": ld["timeframe"], "n_stocks_with_pe": len(ld["positive_pe_tuple"][1]), "sector_pe_plot_uri": cache_plot( sector_pe_cache_key, lambda ld: plot_sector_field(ld["sector_eps_df"], field="mean_pe"), datasets=ld, ), "sector_eps_plot_uri": cache_plot( sector_eps_cache_key, lambda ld: plot_sector_field(ld["sector_eps_df"], field="sum_eps"), datasets=ld, ), "market_pe_plot_uri": market_pe_plot_uri, "sector_positive_top_contributors_eps_uri": cache_plot( f"top-contributors-{sector_eps_cache_key}", lambda ld: plot_sector_top_eps_contributors( ld["eps_df"], ld["stocks_by_sector"]), datasets=ld, ), } return render(request, "pe_trends.html", context)
def show_companies( matching_companies, # may be QuerySet or iterable of stock codes (str) request, sentiment_timeframe: Timeframe, extra_context=None, template_name="all_stocks.html", ): """ Support function to public-facing views to eliminate code redundancy """ if isinstance(matching_companies, QuerySet): stocks_queryset = matching_companies # we assume QuerySet is already sorted by desired criteria elif matching_companies is None or len(matching_companies) > 0: stocks_queryset, _ = latest_quote(matching_companies) # FALLTHRU else: # no companies to report? warning(request, "No matching companies.") return render(request, template_name, context={"timeframe": sentiment_timeframe}) # prune companies without a latest price, makes no sense to report them stocks_queryset = stocks_queryset.exclude(last_price__isnull=True) # sort queryset as this will often be requested by the USER arg = request.GET.get("sort_by", "asx_code") #info(request, "Sorting by {}".format(arg)) if arg == "sector" or arg == "sector,-eps": ss = { s["asx_code"]: s["sector_name"] for s in stocks_by_sector().to_dict("records") } if arg == "sector": stocks_queryset = sorted(stocks_queryset, key=lambda s: ss.get(s.asx_code, "Z") ) # companies without sector sort last else: eps_dict = { s.asx_code: s.eps if s.eps is not None else 0.0 for s in stocks_queryset } stocks_queryset = sorted( stocks_queryset, key=lambda s: (ss.get(s.asx_code, "Z"), -eps_dict.get(s.asx_code, 0.0)), ) else: sort_by = tuple(arg.split(",")) stocks_queryset = stocks_queryset.order_by(*sort_by) # keep track of stock codes for template convenience asx_codes = [quote.asx_code for quote in stocks_queryset] n_top_bottom = (extra_context["n_top_bottom"] if "n_top_bottom" in extra_context else 20) print("show_companies: found {} stocks".format(len(asx_codes))) # setup context dict for the render context = { # NB: title and heatmap_title are expected to be supplied by caller via extra_context "timeframe": sentiment_timeframe, "title": "Caller must override", "watched": user_watchlist(request.user), "n_stocks": len(asx_codes), "n_top_bottom": n_top_bottom, "virtual_purchases": user_purchases(request.user), } # since we sort above, we must setup the pagination also... # assert isinstance(stocks_queryset, QuerySet) paginator = Paginator(stocks_queryset, 50) page_number = request.GET.get("page", 1) page_obj = paginator.page(page_number) context["page_obj"] = page_obj context["object_list"] = paginator # compute totals across all dates for the specified companies to look at top10/bottom10 in the timeframe ld = LazyDictionary() ld["cip_df"] = lambda ld: selected_cached_stocks_cip( asx_codes, sentiment_timeframe) ld["sum_by_company"] = lambda ld: ld["cip_df"].sum(axis=1, numeric_only=True) ld["top10"] = lambda ld: ld["sum_by_company"].nlargest(n_top_bottom) ld["bottom10"] = lambda ld: ld["sum_by_company"].nsmallest(n_top_bottom) ld["stocks_by_sector"] = lambda ld: stocks_by_sector() if len(asx_codes) <= 0 or len(ld["top10"]) <= 0: warning(request, "No matching companies found.") else: sorted_codes = "-".join(sorted(asx_codes)) sentiment_heatmap_uri = cache_plot( f"{sorted_codes}-{sentiment_timeframe.description}-stocks-sentiment-plot", lambda ld: plot_heatmap(sentiment_timeframe, ld), datasets=ld, ) key = f"{sorted_codes}-{sentiment_timeframe.description}-breakdown-plot" sector_breakdown_uri = cache_plot(key, plot_breakdown, datasets=ld) top10_plot_uri = cache_plot( f"top10-plot-{'-'.join(ld['top10'].index)}", lambda ld: plot_cumulative_returns(ld["top10"].index, ld), datasets=ld, ) bottom10_plot_uri = cache_plot( f"bottom10-plot-{'-'.join(ld['bottom10'].index)}", lambda ld: plot_cumulative_returns(ld["bottom10"].index, ld), datasets=ld, ) context.update({ "best_ten": ld["top10"], "worst_ten": ld["bottom10"], "sentiment_heatmap_uri": sentiment_heatmap_uri, "sentiment_heatmap_title": "{}: {}".format(context["title"], sentiment_timeframe.description), "sector_breakdown_uri": sector_breakdown_uri, "top10_plot_uri": top10_plot_uri, "bottom10_plot_uri": bottom10_plot_uri, "timeframe_end_performance": timeframe_end_performance(ld), }) if extra_context: context.update(extra_context) add_messages(request, context) # print(context) return render(request, template_name, context=context)
def cluster_stocks_view(request, stocks: str): """ ref: https://pythonforfinance.net/2018/02/08/stock-clusters-using-k-means-algorithm-in-python/ """ validate_user(request.user) timeframe = Timeframe(past_n_days=300) if stocks == "watchlist": asx_codes = user_watchlist(request.user) elif stocks == "etfs": asx_codes = all_etfs() elif stocks.startswith("sector-"): sector_id = int(stocks[7:]) sector = Sector.objects.get(sector_id=sector_id) if sector is None or sector.sector_name is None: raise Http404("No stocks associated with sector") asx_codes = all_sector_stocks(sector.sector_name) else: raise Http404("Unknown stock list {}".format(stocks)) chosen_k = 7 # often a reasonable tradeoff def elbow_curve_plot(ld: LazyDictionary): distortion, _, _, _, _ = make_kmeans_cluster_dataframe( timeframe, chosen_k, asx_codes ) fig = plt.figure(figsize=(15, 5)) plt.plot(range(2, 20), distortion) plt.grid(True) plt.title("Elbow curve") return fig def cluster_plot(ld: LazyDictionary): _, _, centroids, idx, data_df = make_kmeans_cluster_dataframe( timeframe, chosen_k, asx_codes ) centroids_df = pd.DataFrame.from_records( centroids, columns=["return", "volatility"] ) plot = ( p9.ggplot( data_df, p9.aes("return", "volatility", colour="factor(cluster_id)") ) + p9.geom_point(size=3) + p9.facet_wrap("~cluster_id", ncol=3, scales="free") ) return user_theme( plot, x_axis_label="Returns (%)", y_axis_label="Volatility (%)", figure_size=(15, 15), subplots_adjust={"hspace": 0.15, "wspace": 0.15}, ) stocks_as_str = "-".join(sorted(asx_codes)) elbow_curve_uri = cache_plot( f"{request.user.username}-cluster-{stocks_as_str}-elbow-curve-plot", elbow_curve_plot, ) cluster_uri = cache_plot( f"{request.user.username}-cluster-{stocks_as_str}-kmeans-cluster-plot", cluster_plot, ) context = { "elbow_curve_plot_uri": elbow_curve_uri, "k": chosen_k, "dataset": stocks, "n_stocks": len(asx_codes), "cluster_plot_uri": cluster_uri, "timeframe": timeframe, } return render(request, "cluster_stocks.html", context=context)
def optimise_portfolio( stocks, timeframe: Timeframe, algo="ef-minvol", max_stocks=80, total_portfolio_value=100 * 1000, exclude_price=None, warning_cb=None, **kwargs, ): assert len(stocks) >= 1 assert timeframe is not None assert total_portfolio_value > 0 assert max_stocks >= 5 ( all_returns, stock_prices, latest_prices, first_prices, ) = setup_optimisation_matrices(stocks, timeframe, exclude_price, warning_cb) market_prices = company_prices(("A200", ), Timeframe(past_n_days=180), missing_cb=None) market_prices.index = pd.to_datetime(market_prices.index, format="%Y-%m-%d") market_prices = pd.Series(market_prices["A200"]) quotes, ymd = valid_quotes_only("latest", ensure_date_has_data=True) for t in ((10, 0.0001), (20, 0.0005), (30, 0.001), (40, 0.005), (50, 0.01)): filtered_stocks, n_stocks = select_suitable_stocks( all_returns, stock_prices, max_stocks, *t) # since the sample of stocks might be different, we must recompute each iteration... filtered_stocks = filtered_stocks.sample(n=n_stocks, axis=1) # print(len(filtered_stocks.columns)) market_caps = { q.asx_code: q.market_cap for q in quotes if q.asx_code in filtered_stocks.columns } ld = (LazyDictionary() ) # must start a new dict since each key is immutable after use ld["n_stocks"] = n_stocks ld["filtered_stocks"] = filtered_stocks ld["market_prices"] = market_prices ld["market_caps"] = market_caps ld["total_portfolio_value"] = total_portfolio_value ld["returns_by"] = kwargs.get("returns_by", "by_prices") strategy, kwargs = assign_strategy(ld, algo) try: run_iteration( ld, strategy, first_prices, latest_prices, filtered_stocks, **kwargs, ) # NB: we dont bother caching these plots since we must calculate so many other values but we need to serve them via cache_plot() anyway ld["efficient_frontier_plot"] = cache_plot( secrets.token_urlsafe(32), plot_random_portfolios, datasets=ld) ld["correlation_plot"] = lambda ld: cache_plot( secrets.token_urlsafe(32), lambda ld: plot_covariance(ld["m"], plot_correlation=True). figure, datasets=ld, ) return ld except ValueError as ve: if warning_cb: warning_cb( "Unable to optimise stocks with min_unique={} and var_min={}: n_stocks={} - {}" .format(t[0], t[1], n_stocks, str(ve))) # try next iteration raise ve print("*** WARNING: unable to optimise portolio!") return LazyDictionary()
def show_financial_metrics(request, stock=None): validate_user(request.user) validate_stock(stock) def data_factory(ld: LazyDictionary): data_df = financial_metrics(stock) if data_df is None or len(data_df) < 1: raise Http404(f"No financial metrics available for {stock}") return data_df def find_linear_metrics(ld: LazyDictionary) -> Iterable[str]: linear_metrics = calculate_trends(ld["data_df"]) good_linear_metrics = [] for k, t in linear_metrics.items(): if t[1] < 0.1: good_linear_metrics.append(k) return good_linear_metrics def find_exp_metrics(ld: LazyDictionary) -> Iterable[str]: exp_metrics = calculate_trends( ld["data_df"], polynomial_degree=2, nrmse_cutoff=0.05 ) good_linear_metrics = set(ld["linear_metrics"]) good_exp_metrics = [] for k, t in exp_metrics.items(): if t[1] < 0.1 and k not in good_linear_metrics: good_exp_metrics.append(k) return good_exp_metrics ld = LazyDictionary() ld["data_df"] = lambda ld: data_factory(ld) ld["linear_metrics"] = lambda ld: find_linear_metrics(ld) ld["exp_metrics"] = lambda ld: find_exp_metrics(ld) # print( # f"n_metrics == {len(data_df)} n_trending={len(linear_metrics.keys())} n_good_fit={len(good_linear_metrics)} n_good_exp={len(good_exp_metrics)}" # ) def plot_metrics(df: pd.DataFrame, use_short_labels=False, **kwargs): plot = ( p9.ggplot(df, p9.aes(x="date", y="value", colour="metric")) + p9.geom_line(size=1.3) + p9.geom_point(size=3) ) if use_short_labels: plot += p9.scale_y_continuous(labels=label_shorten) n_metrics = df["metric"].nunique() return user_theme( plot, subplots_adjust={"left": 0.2}, figure_size=(12, int(n_metrics * 1.5)), **kwargs, ) def plot_linear_trending_metrics(ld: LazyDictionary): df = ld["data_df"].filter(ld["linear_metrics"], axis=0) if len(df) < 1: return None df["metric"] = df.index df = df.melt(id_vars="metric").dropna(how="any", axis=0) plot = plot_metrics(df, use_short_labels=True) plot += p9.facet_wrap("~metric", ncol=1, scales="free_y") return plot def plot_exponential_growth_metrics(ld: LazyDictionary): df = ld["data_df"].filter(ld["exp_metrics"], axis=0) if len(df) < 1: return None df["metric"] = df.index df = df.melt(id_vars="metric").dropna(how="any", axis=0) plot = plot_metrics(df) plot += p9.facet_wrap("~metric", ncol=1, scales="free_y") return plot def plot_earnings_and_revenue(ld: LazyDictionary): df = ld["data_df"].filter(["Ebit", "Total Revenue", "Earnings"], axis=0) if len(df) < 2: print(f"WARNING: revenue and earnings not availabe for {stock}") return None df["metric"] = df.index df = df.melt(id_vars="metric").dropna(how="any", axis=0) plot = plot_metrics( df, use_short_labels=True, legend_position="right", y_axis_label="$ AUD", ) # need to show metric name somewhere on plot return plot er_uri = cache_plot( f"{stock}-earnings-revenue-plot", lambda ld: plot_earnings_and_revenue(ld), datasets=ld, ) trending_metrics_uri = cache_plot( f"{stock}-trending-metrics-plot", lambda ld: plot_linear_trending_metrics(ld), datasets=ld, ) exp_growth_metrics_uri = cache_plot( f"{stock}-exponential-growth-metrics-plot", lambda ld: plot_exponential_growth_metrics(ld), datasets=ld, ) warning( request, "Due to experimental data ingest - data on this page may be wrong/misleading/inaccurate/missing. Use at own risk.", ) context = { "asx_code": stock, "data": ld["data_df"], "earnings_and_revenue_plot_uri": er_uri, "trending_metrics_plot_uri": trending_metrics_uri, "exp_growth_metrics_plot_uri": exp_growth_metrics_uri, } return render(request, "stock_financial_metrics.html", context=context)
def market_sentiment(request, n_days=21, n_top_bottom=20, sector_n_days=365): validate_user(request.user) assert n_days > 0 assert n_top_bottom > 0 def market_cap_data_factory(ld: LazyDictionary) -> pd.DataFrame: dates = ld["sector_timeframe"].all_dates() # print(dates) assert len(dates) > 90 result_df = None adjusted_dates = [] for the_date in [dates[0], dates[-1], dates[-30], dates[-90]]: print(f"Before valid_quotes_only for {the_date}") quotes, actual_trading_date = valid_quotes_only( the_date, ensure_date_has_data=True) print(f"After valid_quotes_only for {the_date}") print(f"Before make quotes {actual_trading_date}") print(len(quotes)) df = make_quote_df(quotes, ld["asx_codes"], actual_trading_date) print("After make_quote_df") result_df = df if result_df is None else result_df.append(df) if the_date != actual_trading_date: adjusted_dates.append(the_date) if len(adjusted_dates) > 0: warning( request, "Some dates were not trading days, adjusted: {}".format( adjusted_dates), ) return result_df ld = LazyDictionary() ld["asx_codes"] = lambda ld: all_stocks() ld["sector_timeframe"] = lambda ld: Timeframe(past_n_days=sector_n_days) ld["timeframe"] = lambda ld: Timeframe(past_n_days=n_days) ld["sector_df"] = lambda ld: cached_all_stocks_cip(ld["sector_timeframe"]) ld["sector_cumsum_df"] = lambda ld: ld["sector_df"].cumsum(axis=1) ld["cip_df"] = lambda ld: ld["sector_df"].filter( items=ld["timeframe"].all_dates(), axis=1) ld["market_cap_df"] = lambda ld: market_cap_data_factory(ld) ld["stocks_by_sector"] = lambda ld: stocks_by_sector() sentiment_plot = cache_plot( f"market-sentiment-{ld['timeframe'].description}", lambda ld: plot_heatmap(ld["timeframe"], ld), datasets=ld, ) sector_descr = ld["sector_timeframe"].description sector_performance_plot = cache_plot( f"sector-performance-{sector_descr}", lambda ld: plot_market_wide_sector_performance(ld), datasets=ld, ) market_cap_dist_plot = cache_plot( f"market-cap-dist-{sector_descr}", lambda ld: plot_market_cap_distribution(ld), datasets=ld, ) df = ld["sector_cumsum_df"].transpose() df.index = pd.to_datetime(df.index, format="%Y-%m-%d") df = (df.resample("BM", ).asfreq().diff(periods=1)) ld["monthly_returns_by_stock"] = df # print(df) context = { "sentiment_uri": sentiment_plot, "n_days": ld["timeframe"].n_days, "n_stocks_plotted": len(ld["asx_codes"]), "n_top_bottom": n_top_bottom, "watched": user_watchlist(request.user), "sector_performance_uri": sector_performance_plot, "sector_timeframe": ld["sector_timeframe"], "sector_performance_title": "Cumulative sector avg. performance: {}".format( ld["sector_timeframe"].description), "title": "Market sentiment", "market_cap_distribution_uri": market_cap_dist_plot, "monthly_sector_mean_returns": plot_sector_monthly_mean_returns(ld), } return render(request, "market_sentiment_view.html", context=context)
def show_stock(request, stock=None, n_days=2 * 365): """ Displays a view of a single stock via the template and associated state """ validate_stock(stock) validate_user(request.user) plot_timeframe = Timeframe(past_n_days=n_days) # for template def dataframe(ld: LazyDictionary) -> pd.DataFrame: momentum_timeframe = Timeframe( past_n_days=n_days + 200 ) # to warmup MA200 function df = company_prices( (stock,), momentum_timeframe, fields=all_stock_fundamental_fields, missing_cb=None, ) return df # key dynamic images and text for HTML response. We only compute the required data if image(s) not cached # print(df) ld = LazyDictionary() ld["stock_df"] = lambda ld: ld["stock_df_200"].filter( items=plot_timeframe.all_dates(), axis="rows" ) ld["cip_df"] = lambda: cached_all_stocks_cip(plot_timeframe) ld["stock_df_200"] = lambda ld: dataframe(ld) ld["sector_companies"] = lambda: companies_with_same_sector(stock) ld["company_details"] = lambda: stock_info(stock, lambda msg: warning(request, msg)) ld["sector"] = lambda ld: ld["company_details"].get("sector_name", "") # point_score_results is a tuple (point_score_df, net_points_by_rule) ld["point_score_results"] = lambda ld: make_point_score_dataframe( stock, default_point_score_rules(), ld ) ld["stock_vs_sector_df"] = lambda ld: make_stock_vs_sector_dataframe( ld["cip_df"], stock, ld["sector_companies"] ) print(ld["stock_vs_sector_df"]) momentum_plot = cache_plot( f"{plot_timeframe.description}-{stock}-rsi-plot", lambda ld: plot_momentum(stock, plot_timeframe, ld), datasets=ld, ) monthly_maximum_plot = cache_plot( f"{plot_timeframe.description}-{stock}-monthly-maximum-plot", lambda ld: plot_trend("M", ld), datasets=ld, ) monthly_returns_plot = cache_plot( f"{plot_timeframe.description}-{stock}-monthly returns", lambda ld: plot_monthly_returns(plot_timeframe, stock, ld), datasets=ld, ) company_versus_sector_plot = cache_plot( f"{stock}-{ld['sector']}-company-versus-sector", lambda ld: plot_company_versus_sector( ld["stock_vs_sector_df"], stock, ld["sector"] ), datasets=ld, ) point_score_plot = cache_plot( f"{plot_timeframe.description}-{stock}-point-score-plot", lambda ld: plot_series(ld["point_score_results"][0], x="date", y="points"), datasets=ld, ) net_rule_contributors_plot = cache_plot( f"{plot_timeframe.description}-{stock}-rules-by-points", lambda ld: plot_points_by_rule(ld["point_score_results"][1]), datasets=ld, ) # populate template and render HTML page with context context = { "asx_code": stock, "watched": user_watchlist(request.user), "timeframe": plot_timeframe, "information": ld["company_details"], "momentum": { "rsi_plot": momentum_plot, "monthly_highest_price": { "title": "Highest price each month", "plot_uri": monthly_maximum_plot, }, }, "fundamentals": { "plot_uri": cache_plot( f"{stock}-{plot_timeframe.description}-fundamentals-plot", lambda ld: plot_fundamentals( fundamentals_dataframe(plot_timeframe, stock, ld), stock, ), datasets=ld, ), "title": "Stock fundamentals: EPS, PE, DY etc.", "timeframe": plot_timeframe, }, "stock_vs_sector": { "plot_uri": company_versus_sector_plot, "title": "Company versus sector - percentage change", "timeframe": plot_timeframe, }, "point_score": { "plot_uri": point_score_plot, "title": "Points score due to price movements", }, "net_contributors": { "plot_uri": net_rule_contributors_plot, "title": "Contributions to point score by rule", }, "month_by_month_return_uri": monthly_returns_plot, } return render(request, "stock_page.html", context=context)