def test_stocks_by_sector(comp_deets): df = stocks_by_sector() assert df is not None assert isinstance(df, pd.DataFrame) assert len(df) == 1 assert df.iloc[0].asx_code == 'ANZ' assert df.iloc[0].sector_name == 'Financials'
def plot_breakdown(cip_df: pd.DataFrame): """Stacked bar plot of increasing and decreasing stocks per sector in the specified df""" cols_to_drop = [colname for colname in cip_df.columns if colname.startswith('bin_')] df = cip_df.drop(columns=cols_to_drop) df = pd.DataFrame(df.sum(axis='columns'), columns=['sum']) df = df.merge(stocks_by_sector(), left_index=True, right_on='asx_code') if len(df) == 0: # no stock in cip_df have a sector? ie. ETF? return None assert set(df.columns) == set(['sum', 'asx_code', 'sector_name']) df['increasing'] = df.apply(lambda row: 'up' if row['sum'] >= 0.0 else 'down', axis=1) sector_names = df['sector_name'].value_counts().index.tolist() # sort bars by value count (ascending) sector_names_cat = pd.Categorical(df['sector_name'], categories=sector_names) df = df.assign(sector_name_cat=sector_names_cat) #print(df) plot = ( p9.ggplot(df, p9.aes(x='factor(sector_name_cat)', fill='factor(increasing)')) + p9.geom_bar() + p9.labs(x="Sector", y="Number of stocks") + p9.theme(axis_text_y=p9.element_text(size=7), subplots_adjust={"left": 0.2, 'right': 0.85}, legend_title=p9.element_blank() ) + p9.coord_flip() ) return plot_as_inline_html_data(plot)
def rank_cumulative_change(df: pd.DataFrame, timeframe: Timeframe): cum_sum = defaultdict(float) #print(df) for date in filter(lambda k: k in df.columns, timeframe.all_dates()): for code, price_change in df[date].fillna(0.0).iteritems(): cum_sum[code] += price_change rank = pd.Series(cum_sum).rank(method='first', ascending=False) df[date] = rank all_available_dates = df.columns avgs = df.mean(axis=1) # NB: do this BEFORE adding columns... assert len(avgs) == len(df) df['x'] = all_available_dates[-1] df['y'] = df[all_available_dates[-1]] bins = ['top', 'bin2', 'bin3', 'bin4', 'bin5', 'bottom'] average_rank_binned = pd.cut(avgs, len(bins), bins) assert len(average_rank_binned) == len(df) df['bin'] = average_rank_binned df['asx_code'] = df.index stock_sector_df = stocks_by_sector( ) # make one DB call (cached) rather than lots of round-trips #print(stock_sector_df) stock_sector_df = stock_sector_df.set_index('asx_code') #print(df.index) df['sector'] = [stock_sector_df.loc[code].sector_name for code in df.index] df = pd.melt(df, id_vars=['asx_code', 'bin', 'sector', 'x', 'y'], var_name='date', value_name='rank', value_vars=all_available_dates) df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%d") df['x'] = pd.to_datetime(df['x'], format="%Y-%m-%d") return df
def plot_market_wide_sector_performance(all_stocks_cip: pd.DataFrame): """ Display specified dates for average sector performance. Each company is assumed to have at zero at the start of the observation period. A plot as base64 data is returned. """ n_stocks = len(all_stocks_cip) # merge in sector information for each company code_and_sector = stocks_by_sector() n_unique_sectors = len(code_and_sector["sector_name"].unique()) print("Found {} unique sectors".format(n_unique_sectors)) #print(df) #print(code_and_sector) df = all_stocks_cip.merge(code_and_sector, left_index=True, right_on="asx_code") print( "Found {} stocks, {} sectors and merged total: {}".format( n_stocks, len(code_and_sector), len(df) ) ) # compute average change in percent of each unique sector over each day and sum over the dates cumulative_pct_change = df.expanding(axis="columns").sum() # merge date-wise into df for date in cumulative_pct_change.columns: df[date] = cumulative_pct_change[date] # df.to_csv('/tmp/crap.csv') grouped_df = df.groupby("sector_name").mean() # grouped_df.to_csv('/tmp/crap.csv') # ready the dataframe for plotting grouped_df = pd.melt( grouped_df, ignore_index=False, var_name="date", value_name="cumulative_change_percent", ) grouped_df["sector"] = grouped_df.index grouped_df["date"] = pd.to_datetime(grouped_df["date"]) n_col = 3 plot = ( p9.ggplot( grouped_df, p9.aes("date", "cumulative_change_percent", color="sector") ) + p9.geom_line(size=1.0) + p9.facet_wrap( "~sector", nrow=n_unique_sectors // n_col + 1, ncol=n_col, scales="free_y" ) + p9.xlab("") + p9.ylab("Average sector change (%)") + p9.theme( axis_text_x=p9.element_text(angle=30, size=6), axis_text_y=p9.element_text(size=6), figure_size=(12, 6), panel_spacing=0.3, legend_position="none", ) ) return plot_as_inline_html_data(plot)
def test_stocks_by_sector( comp_deets, ): # pylint: disable=unused-argument,redefined-outer-name df = stocks_by_sector() assert df is not None assert isinstance(df, pd.DataFrame) assert len(df) == 1 assert df.iloc[0].asx_code == "ANZ" assert df.iloc[0].sector_name == "Financials"
def get_dataset(dataset_wanted, request, timeframe=None): assert (dataset_wanted in set(["market_sentiment", "eps-per-sector"]) or dataset_wanted.startswith("kmeans-") or dataset_wanted.startswith("financial-metrics-") or dataset_wanted.startswith("stock-quotes-")) if timeframe is None: timeframe = Timeframe(past_n_days=300) if dataset_wanted == "market_sentiment": df = cached_all_stocks_cip(timeframe) return df elif dataset_wanted == "kmeans-watchlist": _, _, _, _, df = make_kmeans_cluster_dataframe( timeframe, 7, user_watchlist(request.user)) return df elif dataset_wanted == "kmeans-etfs": _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7, all_etfs()) return df elif dataset_wanted.startswith("stock-quotes-"): stock = dataset_wanted[len("stock-quotes-"):] validate_stock(stock) df = company_prices([stock], timeframe=timeframe, fields=all_stock_fundamental_fields, missing_cb=None) df['stock_code'] = stock return df elif dataset_wanted.startswith("kmeans-sector-"): sector_id = int(dataset_wanted[14:]) sector = Sector.objects.get(sector_id=sector_id) if sector is None or sector.sector_name is None: raise Http404("No stocks associated with sector") asx_codes = all_sector_stocks(sector.sector_name) _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7, asx_codes) return df elif dataset_wanted.startswith("financial-metrics-"): stock = dataset_wanted[len("financial-metrics-"):] validate_stock(stock) df = financial_metrics(stock) if df is not None: # excel doesnt support timezones, so we remove it first colnames = [d.strftime("%Y-%m-%d") for d in df.columns] df.columns = colnames # FALLTHRU return df elif dataset_wanted == "eps-per-sector": df, _ = pe_trends_df(Timeframe(past_n_days=180)) df = make_pe_trends_eps_df(df, stocks_by_sector()) df = df.set_index("asx_code", drop=True) return df else: raise ValueError("Unsupported dataset {}".format(dataset_wanted))
def plot_market_wide_sector_performance(all_dates, field_name='change_in_percent'): """ Display specified dates for average sector performance. Each company is assumed to have at zero at the start of the observation period. A plot as base64 data is returned. """ df = company_prices(None, all_dates=all_dates, fields='change_in_percent') # None == all stocks n_stocks = len(df) # merge in sector information for each company code_and_sector = stocks_by_sector() n_unique_sectors = len(code_and_sector['sector_name'].unique()) print("Found {} unique sectors".format(n_unique_sectors)) #print(code_and_sector) df = df.merge(code_and_sector, left_on='asx_code', right_on='asx_code') print("Found {} stocks, {} sectors and merged total: {}".format( n_stocks, len(code_and_sector), len(df))) # compute average change in percent of each unique sector over each day and sum over the dates cumulative_pct_change = df.expanding(axis='columns').sum() # merge date-wise into df for date in cumulative_pct_change.columns: df[date] = cumulative_pct_change[date] #df.to_csv('/tmp/crap.csv') grouped_df = df.groupby('sector_name').mean() #grouped_df.to_csv('/tmp/crap.csv') # ready the dataframe for plotting grouped_df = pd.melt(grouped_df, ignore_index=False, var_name='date', value_name='cumulative_change_percent') grouped_df['sector'] = grouped_df.index grouped_df['date'] = pd.to_datetime(grouped_df['date']) n_col = 3 plot = (p9.ggplot( grouped_df, p9.aes('date', 'cumulative_change_percent', color='sector')) + p9.geom_line(size=1.0) + p9.facet_wrap('~sector', nrow=n_unique_sectors // n_col + 1, ncol=n_col, scales='free_y') + p9.xlab('') + p9.ylab('Average sector change (%)') + p9.theme(axis_text_x=p9.element_text(angle=30, size=6), axis_text_y=p9.element_text(size=6), figure_size=(12, 6), panel_spacing=0.3, legend_position='none')) return plot_as_inline_html_data(plot)
def data_factory(df: pd.DataFrame) -> pd.DataFrame: df = df.pivot( index=["asx_code", "fetch_date"], columns="field_name", values="field_value" ) required = (df.number_of_shares > 0) & (df.eps > 0.0) df = df[required] # ignore stocks which have unknowns # print(df) df["total_earnings"] = df["eps"] * df["number_of_shares"] df = df.dropna(how="any", axis=0) df = df.reset_index() df = df.pivot(index="asx_code", columns="fetch_date", values="total_earnings") df = df.merge(stocks_by_sector(), left_index=True, right_on="asx_code") df = df.set_index("asx_code", drop=True) df = df.groupby("sector_name").sum() df["sector_name"] = df.index df = df.melt(id_vars="sector_name", var_name="fetch_date") assert set(df.columns) == set(["sector_name", "fetch_date", "value"]) df["fetch_date"] = pd.to_datetime(df["fetch_date"], format="%Y-%m-%d") return df
def rank_cumulative_change(df: pd.DataFrame, timeframe: Timeframe): cum_sum = defaultdict(float) # print(df) for date in filter(lambda k: k in df.columns, timeframe.all_dates()): for code, price_change in df[date].fillna(0.0).iteritems(): cum_sum[code] += price_change rank = pd.Series(cum_sum).rank(method="first", ascending=False) df[date] = rank all_available_dates = df.columns avgs = df.mean(axis=1) # NB: do this BEFORE adding columns... assert len(avgs) == len(df) df["x"] = all_available_dates[-1] df["y"] = df[all_available_dates[-1]] bins = ["top", "bin2", "bin3", "bin4", "bin5", "bottom"] average_rank_binned = pd.cut(avgs, len(bins), bins) assert len(average_rank_binned) == len(df) df["bin"] = average_rank_binned df["asx_code"] = df.index stock_sector_df = ( stocks_by_sector() ) # make one DB call (cached) rather than lots of round-trips # print(stock_sector_df) stock_sector_df = stock_sector_df.set_index("asx_code") # print(df.index) df = df.merge( stock_sector_df, left_index=True, right_on="asx_code" ) # NB: this merge will lose rows: those that dont have a sector eg. ETF's df = pd.melt( df, id_vars=["asx_code", "bin", "sector_name", "x", "y"], var_name="date", value_name="rank", value_vars=all_available_dates, ) df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d") df["x"] = pd.to_datetime(df["x"], format="%Y-%m-%d") return df
def show_pe_trends(request): """ Display a plot of per-sector PE trends across stocks in each sector ref: https://www.commsec.com.au/education/learn/choosing-investments/what-is-price-to-earnings-pe-ratio.html """ validate_user(request.user) timeframe = Timeframe(past_n_days=180) pe_df = company_prices(None, timeframe, fields="pe", missing_cb=None, transpose=True) eps_df = company_prices(None, timeframe, fields="eps", missing_cb=None, transpose=True) ss = stocks_by_sector() ss_dict = {row.asx_code: row.sector_name for row in ss.itertuples()} #print(ss_dict) eps_stocks = set(eps_df.index) n_stocks = len(pe_df) positive_pe_stocks = set(pe_df[pe_df.sum(axis=1) > 0.0].index) all_stocks = set(pe_df.index) n_non_zero_sum = len(positive_pe_stocks) #print(exclude_zero_sum) records = [] trading_dates = set(pe_df.columns) sector_counts_all_stocks = ss['sector_name'].value_counts() all_sectors = set(ss['sector_name'].unique()) pe_pos_df = pe_df.filter(items=positive_pe_stocks, axis=0).merge(ss, left_index=True, right_on='asx_code') assert len(pe_pos_df) <= len(positive_pe_stocks) and len(pe_pos_df) > 0 market_avg_pe_df = pe_pos_df.mean(axis=0).to_frame( name='market_pe') # avg P/E by date series market_avg_pe_df['date'] = pd.to_datetime(market_avg_pe_df.index) #print(market_avg_pe_df) breakdown_by_sector_pe_pos_stocks_only = pe_pos_df[ 'sector_name'].value_counts() #print(breakdown_by_sector_pe_pos_stocks_only) sector_counts_pe_pos_stocks_only = { s[0]: s[1] for s in breakdown_by_sector_pe_pos_stocks_only.items() } #print(sector_counts_pe_pos_stocks_only) #print(sector_counts_all_stocks) #print(sector_counts_pe_pos_stocks_only) for ymd in filter(lambda d: d in trading_dates, timeframe.all_dates( )): # needed to avoid KeyError raised during DataFrame.at[] calls below sum_pe_per_sector = defaultdict(float) sum_eps_per_sector = defaultdict(float) for stock in filter(lambda code: code in ss_dict, all_stocks): sector = ss_dict[stock] assert isinstance(sector, str) if stock in eps_stocks: eps = eps_df.at[stock, ymd] if isnan(eps): continue sum_eps_per_sector[sector] += eps if stock in positive_pe_stocks: pe = pe_df.at[stock, ymd] if isnan(pe): continue assert pe >= 0.0 sum_pe_per_sector[sector] += pe #print(sum_pe_per_sector) assert len(sector_counts_pe_pos_stocks_only) == len(sum_pe_per_sector) assert len(sector_counts_all_stocks) == len(sum_eps_per_sector) for sector in all_sectors: pe_sum = sum_pe_per_sector.get(sector, None) n_pe = sector_counts_pe_pos_stocks_only.get(sector, None) pe_mean = pe_sum / n_pe if pe_sum is not None else None eps_sum = sum_eps_per_sector.get(sector, None) records.append({ 'date': ymd, 'sector': sector, 'mean_pe': pe_mean, 'sum_pe': pe_sum, 'sum_eps': eps_sum, 'n_stocks': n_stocks, 'n_sector_stocks_pe_only': n_pe }) df = pd.DataFrame.from_records(records) #print(df[df["sector"] == 'Utilities']) #print(df) context = { "title": "PE Trends: {}".format(timeframe.description), "n_stocks": n_stocks, "timeframe": timeframe, "n_stocks_with_pe": n_non_zero_sum, "sector_pe_plot": plot_sector_field(df, field="mean_pe"), "sector_eps_plot": plot_sector_field(df, field="sum_eps"), "market_pe_plot": plot_series(market_avg_pe_df, x='date', y='market_pe', y_axis_label="Market-wide mean P/E", color=None, use_smooth_line=True) } return render(request, "pe_trends.html", context)
def show_companies( matching_companies, # may be QuerySet or iterable of stock codes (str) request, sentiment_timeframe: Timeframe, extra_context=None, template_name="all_stocks.html", ): """ Support function to public-facing views to eliminate code redundancy """ if isinstance(matching_companies, QuerySet): stocks_queryset = matching_companies # we assume QuerySet is already sorted by desired criteria elif matching_companies is None or len(matching_companies) > 0: stocks_queryset, _ = latest_quote(matching_companies) # FALLTHRU else: # no companies to report? warning(request, "No matching companies.") return render(request, template_name, context={"timeframe": sentiment_timeframe}) # prune companies without a latest price, makes no sense to report them stocks_queryset = stocks_queryset.exclude(last_price__isnull=True) # sort queryset as this will often be requested by the USER arg = request.GET.get("sort_by", "asx_code") #info(request, "Sorting by {}".format(arg)) if arg == "sector" or arg == "sector,-eps": ss = { s["asx_code"]: s["sector_name"] for s in stocks_by_sector().to_dict("records") } if arg == "sector": stocks_queryset = sorted(stocks_queryset, key=lambda s: ss.get(s.asx_code, "Z") ) # companies without sector sort last else: eps_dict = { s.asx_code: s.eps if s.eps is not None else 0.0 for s in stocks_queryset } stocks_queryset = sorted( stocks_queryset, key=lambda s: (ss.get(s.asx_code, "Z"), -eps_dict.get(s.asx_code, 0.0)), ) else: sort_by = tuple(arg.split(",")) stocks_queryset = stocks_queryset.order_by(*sort_by) # keep track of stock codes for template convenience asx_codes = [quote.asx_code for quote in stocks_queryset] n_top_bottom = (extra_context["n_top_bottom"] if "n_top_bottom" in extra_context else 20) print("show_companies: found {} stocks".format(len(asx_codes))) # setup context dict for the render context = { # NB: title and heatmap_title are expected to be supplied by caller via extra_context "timeframe": sentiment_timeframe, "title": "Caller must override", "watched": user_watchlist(request.user), "n_stocks": len(asx_codes), "n_top_bottom": n_top_bottom, "virtual_purchases": user_purchases(request.user), } # since we sort above, we must setup the pagination also... # assert isinstance(stocks_queryset, QuerySet) paginator = Paginator(stocks_queryset, 50) page_number = request.GET.get("page", 1) page_obj = paginator.page(page_number) context["page_obj"] = page_obj context["object_list"] = paginator # compute totals across all dates for the specified companies to look at top10/bottom10 in the timeframe ld = LazyDictionary() ld["cip_df"] = lambda ld: selected_cached_stocks_cip( asx_codes, sentiment_timeframe) ld["sum_by_company"] = lambda ld: ld["cip_df"].sum(axis=1, numeric_only=True) ld["top10"] = lambda ld: ld["sum_by_company"].nlargest(n_top_bottom) ld["bottom10"] = lambda ld: ld["sum_by_company"].nsmallest(n_top_bottom) ld["stocks_by_sector"] = lambda ld: stocks_by_sector() if len(asx_codes) <= 0 or len(ld["top10"]) <= 0: warning(request, "No matching companies found.") else: sorted_codes = "-".join(sorted(asx_codes)) sentiment_heatmap_uri = cache_plot( f"{sorted_codes}-{sentiment_timeframe.description}-stocks-sentiment-plot", lambda ld: plot_heatmap(sentiment_timeframe, ld), datasets=ld, ) key = f"{sorted_codes}-{sentiment_timeframe.description}-breakdown-plot" sector_breakdown_uri = cache_plot(key, plot_breakdown, datasets=ld) top10_plot_uri = cache_plot( f"top10-plot-{'-'.join(ld['top10'].index)}", lambda ld: plot_cumulative_returns(ld["top10"].index, ld), datasets=ld, ) bottom10_plot_uri = cache_plot( f"bottom10-plot-{'-'.join(ld['bottom10'].index)}", lambda ld: plot_cumulative_returns(ld["bottom10"].index, ld), datasets=ld, ) context.update({ "best_ten": ld["top10"], "worst_ten": ld["bottom10"], "sentiment_heatmap_uri": sentiment_heatmap_uri, "sentiment_heatmap_title": "{}: {}".format(context["title"], sentiment_timeframe.description), "sector_breakdown_uri": sector_breakdown_uri, "top10_plot_uri": top10_plot_uri, "bottom10_plot_uri": bottom10_plot_uri, "timeframe_end_performance": timeframe_end_performance(ld), }) if extra_context: context.update(extra_context) add_messages(request, context) # print(context) return render(request, template_name, context=context)
def detect_outliers(stocks: list, all_stocks_cip: pd.DataFrame, rules=None): """ Returns a dataframe describing those outliers present in stocks based on the provided rules. All_stocks_cip is the "change in percent" for at least the stocks present in the specified list """ if rules is None: rules = default_point_score_rules() str_rules = {str(r): r for r in rules} rows = [] stocks_by_sector_df = (stocks_by_sector() ) # NB: ETFs in watchlist will have no sector stocks_by_sector_df.index = stocks_by_sector_df["asx_code"] for stock in stocks: # print("Processing stock: ", stock) try: sector = stocks_by_sector_df.at[stock, "sector_name"] sector_companies = list(stocks_by_sector_df.loc[ stocks_by_sector_df["sector_name"] == sector].asx_code) # day_low_high() may raise KeyError when data is currently being fetched, so it appears here... day_low_high_df = day_low_high(stock, all_stocks_cip.columns) except KeyError: warning( None, "Unable to locate watchlist entry: {} - continuing without it". format(stock), ) continue state = { "day_low_high_df": day_low_high_df, # never changes each day, so we init it here "all_stocks_change_in_percent_df": all_stocks_cip, "stock": stock, "daily_range_threshold": 0.20, # 20% at either end of the daily range gets a point } points_by_rule = defaultdict(int) for date in all_stocks_cip.columns: market_avg = all_stocks_cip[date].mean() sector_avg = all_stocks_cip[date].filter( items=sector_companies).mean() stock_move = all_stocks_cip.at[stock, date] state.update({ "market_avg": market_avg, "sector_avg": sector_avg, "stock_move": stock_move, "date": date, }) for rule_name, rule in str_rules.items(): try: points_by_rule[rule_name] += rule(state) except TypeError: # handle nan's in dataset safely pass d = {"stock": stock} d.update(points_by_rule) rows.append(d) df = pd.DataFrame.from_records(rows) df = df.set_index("stock") # print(df) clf = IForest() clf.fit(df) scores = clf.predict(df) results = [ row[0] for row, value in zip(df.iterrows(), scores) if value > 0 ] # print(results) print("Found {} outlier stocks".format(len(results))) return results
def market_sentiment(request, n_days=21, n_top_bottom=20, sector_n_days=365): validate_user(request.user) assert n_days > 0 assert n_top_bottom > 0 def market_cap_data_factory(ld: LazyDictionary) -> pd.DataFrame: dates = ld["sector_timeframe"].all_dates() # print(dates) assert len(dates) > 90 result_df = None adjusted_dates = [] for the_date in [dates[0], dates[-1], dates[-30], dates[-90]]: print(f"Before valid_quotes_only for {the_date}") quotes, actual_trading_date = valid_quotes_only( the_date, ensure_date_has_data=True) print(f"After valid_quotes_only for {the_date}") print(f"Before make quotes {actual_trading_date}") print(len(quotes)) df = make_quote_df(quotes, ld["asx_codes"], actual_trading_date) print("After make_quote_df") result_df = df if result_df is None else result_df.append(df) if the_date != actual_trading_date: adjusted_dates.append(the_date) if len(adjusted_dates) > 0: warning( request, "Some dates were not trading days, adjusted: {}".format( adjusted_dates), ) return result_df ld = LazyDictionary() ld["asx_codes"] = lambda ld: all_stocks() ld["sector_timeframe"] = lambda ld: Timeframe(past_n_days=sector_n_days) ld["timeframe"] = lambda ld: Timeframe(past_n_days=n_days) ld["sector_df"] = lambda ld: cached_all_stocks_cip(ld["sector_timeframe"]) ld["sector_cumsum_df"] = lambda ld: ld["sector_df"].cumsum(axis=1) ld["cip_df"] = lambda ld: ld["sector_df"].filter( items=ld["timeframe"].all_dates(), axis=1) ld["market_cap_df"] = lambda ld: market_cap_data_factory(ld) ld["stocks_by_sector"] = lambda ld: stocks_by_sector() sentiment_plot = cache_plot( f"market-sentiment-{ld['timeframe'].description}", lambda ld: plot_heatmap(ld["timeframe"], ld), datasets=ld, ) sector_descr = ld["sector_timeframe"].description sector_performance_plot = cache_plot( f"sector-performance-{sector_descr}", lambda ld: plot_market_wide_sector_performance(ld), datasets=ld, ) market_cap_dist_plot = cache_plot( f"market-cap-dist-{sector_descr}", lambda ld: plot_market_cap_distribution(ld), datasets=ld, ) df = ld["sector_cumsum_df"].transpose() df.index = pd.to_datetime(df.index, format="%Y-%m-%d") df = (df.resample("BM", ).asfreq().diff(periods=1)) ld["monthly_returns_by_stock"] = df # print(df) context = { "sentiment_uri": sentiment_plot, "n_days": ld["timeframe"].n_days, "n_stocks_plotted": len(ld["asx_codes"]), "n_top_bottom": n_top_bottom, "watched": user_watchlist(request.user), "sector_performance_uri": sector_performance_plot, "sector_timeframe": ld["sector_timeframe"], "sector_performance_title": "Cumulative sector avg. performance: {}".format( ld["sector_timeframe"].description), "title": "Market sentiment", "market_cap_distribution_uri": market_cap_dist_plot, "monthly_sector_mean_returns": plot_sector_monthly_mean_returns(ld), } return render(request, "market_sentiment_view.html", context=context)
def show_pe_trends(request): """ Display a plot of per-sector PE trends across stocks in each sector ref: https://www.commsec.com.au/education/learn/choosing-investments/what-is-price-to-earnings-pe-ratio.html """ validate_user(request.user) def make_pe_trends_market_avg_df(ld: LazyDictionary) -> pd.DataFrame: df = ld["data_df"] ss = ld["stocks_by_sector"] pe_pos_df, _ = make_pe_trends_positive_pe_df(df, ss) market_avg_pe_df = pe_pos_df.mean(axis=0, numeric_only=True).to_frame( name="market_pe") # avg P/E by date series market_avg_pe_df["date"] = pd.to_datetime(market_avg_pe_df.index) return market_avg_pe_df def sector_eps_data_factory(ld: LazyDictionary) -> pd.DataFrame: df = ld["data_df"] n_stocks = df["asx_code"].nunique() pe_df, positive_pe_stocks = ld["positive_pe_tuple"] eps_df = ld["eps_df"] ss = ld["stocks_by_sector"] # print(positive_pe_stocks) eps_stocks = set(eps_df.index) ss_dict = {row.asx_code: row.sector_name for row in ss.itertuples()} # print(ss_dict) trading_dates = set(pe_df.columns) trading_dates.remove("sector_name") sector_counts_all_stocks = ss["sector_name"].value_counts() all_sectors = set(ss["sector_name"].unique()) breakdown_by_sector_pe_pos_stocks_only = pe_df[ "sector_name"].value_counts() # print(breakdown_by_sector_pe_pos_stocks_only) sector_counts_pe_pos_stocks_only = { s[0]: s[1] for s in breakdown_by_sector_pe_pos_stocks_only.items() } # print(sector_counts_pe_pos_stocks_only) # print(sector_counts_all_stocks) # print(sector_counts_pe_pos_stocks_only) records = [] for ymd in filter( lambda d: d in trading_dates, ld["timeframe"].all_dates() ): # needed to avoid KeyError raised during DataFrame.at[] calls below sum_pe_per_sector = defaultdict(float) sum_eps_per_sector = defaultdict(float) for stock in filter(lambda code: code in ss_dict, positive_pe_stocks): sector = ss_dict[stock] assert isinstance(sector, str) if stock in eps_stocks: eps = eps_df.at[stock, ymd] if isnan(eps): continue sum_eps_per_sector[sector] += eps if stock in positive_pe_stocks: pe = pe_df.at[stock, ymd] if isnan(pe): continue assert pe >= 0.0 sum_pe_per_sector[sector] += pe # print(len(sector_counts_all_stocks)) # print(len(sum_eps_per_sector)) assert len(sector_counts_pe_pos_stocks_only) >= len( sum_pe_per_sector) assert len(sector_counts_all_stocks) >= len(sum_eps_per_sector) for sector in all_sectors: pe_sum = sum_pe_per_sector.get(sector, None) n_pe = sector_counts_pe_pos_stocks_only.get(sector, None) pe_mean = pe_sum / n_pe if pe_sum is not None else None eps_sum = sum_eps_per_sector.get(sector, None) records.append({ "date": ymd, "sector": sector, "mean_pe": pe_mean, "sum_pe": pe_sum, "sum_eps": eps_sum, "n_stocks": n_stocks, "n_sector_stocks_pe_only": n_pe, }) df = pd.DataFrame.from_records(records) # print(df[df["sector"] == 'Utilities']) # print(df) return df ld = LazyDictionary() ld["data_df"] = lambda ld: pe_trends_df(ld["timeframe"]) ld["positive_pe_tuple"] = lambda ld: make_pe_trends_positive_pe_df( ld["data_df"], ld["stocks_by_sector"]) ld["market_avg_pe_df"] = lambda ld: make_pe_trends_market_avg_df(ld) ld["eps_df"] = lambda ld: make_pe_trends_eps_df(ld["data_df"]) ld["sector_eps_df"] = lambda ld: sector_eps_data_factory(ld) ld["stocks_by_sector"] = stocks_by_sector() ld["timeframe"] = Timeframe(past_n_days=180) td = ld["timeframe"].description # these arent per-user plots: they can safely be shared across all users of the site, so the key reflects that sector_pe_cache_key = f"{td}-by-sector-pe-plot" sector_eps_cache_key = f"{td}-by-sector-eps-plot" market_pe_cache_key = f"{td}-market-pe-mean" market_pe_plot_uri = cache_plot( market_pe_cache_key, lambda ld: plot_series( ld["market_avg_pe_df"], x="date", y="market_pe", y_axis_label="Market-wide mean P/E", color=None, use_smooth_line=True, ), datasets=ld, ) context = { "title": "PE Trends", "n_stocks": ld["data_df"]["asx_code"].nunique(), "timeframe": ld["timeframe"], "n_stocks_with_pe": len(ld["positive_pe_tuple"][1]), "sector_pe_plot_uri": cache_plot( sector_pe_cache_key, lambda ld: plot_sector_field(ld["sector_eps_df"], field="mean_pe"), datasets=ld, ), "sector_eps_plot_uri": cache_plot( sector_eps_cache_key, lambda ld: plot_sector_field(ld["sector_eps_df"], field="sum_eps"), datasets=ld, ), "market_pe_plot_uri": market_pe_plot_uri, "sector_positive_top_contributors_eps_uri": cache_plot( f"top-contributors-{sector_eps_cache_key}", lambda ld: plot_sector_top_eps_contributors( ld["eps_df"], ld["stocks_by_sector"]), datasets=ld, ), } return render(request, "pe_trends.html", context)