def get_queryset(self, **kwargs): # user never run this view before? if kwargs == {}: print("WARNING: no form parameters specified - returning empty queryset") return Quotation.objects.none() self.sector = kwargs.get("sector", self.sector) self.sector_id = int(Sector.objects.get(sector_name=self.sector).sector_id) wanted_stocks = all_sector_stocks(self.sector) print("Found {} stocks matching sector={}".format(len(wanted_stocks), self.sector)) mrd = latest_quotation_date('ANZ') report_top_n = kwargs.get('report_top_n', None) report_bottom_n = kwargs.get('report_bottom_n', None) if report_top_n is not None or report_bottom_n is not None: cip_sum = selected_cached_stocks_cip(wanted_stocks, Timeframe(past_n_days=90)).transpose().sum().to_frame(name="percent_cip") #print(cip_sum) top_N = set(cip_sum.nlargest(report_top_n, "percent_cip").index) if report_top_n is not None else set() bottom_N = set(cip_sum.nsmallest(report_bottom_n, "percent_cip").index) if report_bottom_n is not None else set() wanted_stocks = top_N.union(bottom_N) print("Requesting valid quotes for {} stocks".format(len(wanted_stocks))) self.qs = valid_quotes_only(mrd).filter(asx_code__in=wanted_stocks) if len(self.qs) < len(wanted_stocks): got = set([q.asx_code for q in self.qs.all()]) missing_stocks = wanted_stocks.difference(got) warning(self.request, f"could not obtain quotes for all stocks as at {mrd}: {missing_stocks}") return self.qs
def analyse_sector(stock, sector, all_stocks_cip, window_size=14): assert all_stocks_cip is not None sector_companies = all_sector_stocks(sector) if sector else [] # ETFs dont have a sector for now... if len(sector_companies) > 0: cip = all_stocks_cip.filter(items=sector_companies, axis='index') cip = cip.fillna(0.0) #assert len(cip) == len(sector_companies) # may fail when some stocks missing due to delisted etc. rows = [] cum_sum = defaultdict(float) stock_versus_sector = [] # identify the best performing stock in the sector and add it to the stock_versus_sector rows... best_stock_in_sector = cip.sum(axis=1).nlargest(1).index[0] for day in sorted(cip.columns, key=lambda k: datetime.strptime(k, "%Y-%m-%d")): for asx_code, daily_change in cip[day].iteritems(): cum_sum[asx_code] += daily_change n_pos = len(list(filter(lambda t: t[1] >= 5.0, cum_sum.items()))) n_neg = len(list(filter(lambda t: t[1] < -5.0, cum_sum.items()))) n_unchanged = len(cip) - n_pos - n_neg rows.append({ 'n_pos': n_pos, 'n_neg': n_neg, 'n_unchanged': n_unchanged, 'date': day}) stock_versus_sector.append({ 'group': stock, 'date': day, 'value': cum_sum[stock] }) stock_versus_sector.append({ 'group': 'sector_average', 'date': day, 'value': pd.Series(cum_sum).mean() }) if stock != best_stock_in_sector: stock_versus_sector.append({ 'group': '{} (best in {})'.format(best_stock_in_sector, sector), 'value': cum_sum[best_stock_in_sector], 'date': day}) df = pd.DataFrame.from_records(rows) sector_momentum_plot = plot_sector_performance(df, sector, window_size=window_size) stock_versus_sector_df = pd.DataFrame.from_records(stock_versus_sector) c_vs_s_plot = plot_company_versus_sector(stock_versus_sector_df, stock, sector) point_score_plot = analyse_point_scores(stock, sector_companies, all_stocks_cip) else: c_vs_s_plot = sector_momentum_plot = point_score_plot = None return c_vs_s_plot, sector_momentum_plot, point_score_plot
def show_sector_outliers(request, sector_id=None, n_days=30): validate_user(request.user) assert isinstance(sector_id, int) and sector_id > 0 stocks = all_sector_stocks( Sector.objects.get(sector_id=sector_id).sector_name) return show_outliers(request, stocks, n_days=n_days)
def test_all_sectors(all_sector_fixture): # since company_details_factory gives a single ANZ company details record, this test will work... ret = all_sectors() #print(ret) assert ret == [('Financials', 'Financials')] # and check the reverse is true: financials -> ANZ all_sector_stocks.cache_clear() assert all_sector_stocks('Financials') == set(['ANZ'])
def recalc_queryset(self, **kwargs): if kwargs == {} or not any( ["name" in kwargs, "activity" in kwargs, "sector" in kwargs]): return Quotation.objects.none() wanted_name = kwargs.get("name", "") wanted_activity = kwargs.get("activity", "") if len(wanted_name) > 0 or len(wanted_activity) > 0: matching_companies = find_named_companies(wanted_name, wanted_activity) else: matching_companies = all_stocks() sector = kwargs.get("sector", self.DEFAULT_SECTOR) sector_id = int(Sector.objects.get(sector_name=sector).sector_id) sector_stocks = all_sector_stocks(sector) if kwargs.get("sector_enabled", False): matching_companies = matching_companies.intersection(sector_stocks) print("Found {} companies matching: name={} or activity={}".format( len(matching_companies), wanted_name, wanted_activity)) report_top_n = kwargs.get("report_top_n", None) report_bottom_n = kwargs.get("report_bottom_n", None) self.timeframe = Timeframe(past_n_days=90) ld = LazyDictionary() ld["sector"] = sector ld["sector_id"] = sector_id ld["sector_companies"] = sector_stocks if len(matching_companies) > 0: ld["cip_df"] = selected_cached_stocks_cip(matching_companies, self.timeframe) else: ld["cip_df"] = pd.DataFrame() ld["sector_performance_df"] = lambda ld: make_sector_performance_dataframe( ld["cip_df"], ld["sector_companies"]) ld["sector_performance_plot"] = lambda ld: self.sector_performance(ld) self.ld = ld wanted_stocks = self.filter_top_bottom(ld, matching_companies, report_top_n, report_bottom_n) print("Requesting valid quotes for {} stocks".format( len(wanted_stocks))) quotations_as_at, actual_mrd = valid_quotes_only( "latest", ensure_date_has_data=True) ret = quotations_as_at.filter(asx_code__in=wanted_stocks) if len(ret) < len(wanted_stocks): got = set([q.asx_code for q in self.qs.all()]) if self.qs else set() missing_stocks = wanted_stocks.difference(got) warning( self.request, f"could not obtain quotes for all stocks as at {actual_mrd}: {missing_stocks}", ) print("Showing results for {} companies".format( len(matching_companies))) ret, _ = latest_quote(tuple(matching_companies)) return ret
def get_dataset(dataset_wanted, request, timeframe=None): assert (dataset_wanted in set(["market_sentiment", "eps-per-sector"]) or dataset_wanted.startswith("kmeans-") or dataset_wanted.startswith("financial-metrics-") or dataset_wanted.startswith("stock-quotes-")) if timeframe is None: timeframe = Timeframe(past_n_days=300) if dataset_wanted == "market_sentiment": df = cached_all_stocks_cip(timeframe) return df elif dataset_wanted == "kmeans-watchlist": _, _, _, _, df = make_kmeans_cluster_dataframe( timeframe, 7, user_watchlist(request.user)) return df elif dataset_wanted == "kmeans-etfs": _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7, all_etfs()) return df elif dataset_wanted.startswith("stock-quotes-"): stock = dataset_wanted[len("stock-quotes-"):] validate_stock(stock) df = company_prices([stock], timeframe=timeframe, fields=all_stock_fundamental_fields, missing_cb=None) df['stock_code'] = stock return df elif dataset_wanted.startswith("kmeans-sector-"): sector_id = int(dataset_wanted[14:]) sector = Sector.objects.get(sector_id=sector_id) if sector is None or sector.sector_name is None: raise Http404("No stocks associated with sector") asx_codes = all_sector_stocks(sector.sector_name) _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7, asx_codes) return df elif dataset_wanted.startswith("financial-metrics-"): stock = dataset_wanted[len("financial-metrics-"):] validate_stock(stock) df = financial_metrics(stock) if df is not None: # excel doesnt support timezones, so we remove it first colnames = [d.strftime("%Y-%m-%d") for d in df.columns] df.columns = colnames # FALLTHRU return df elif dataset_wanted == "eps-per-sector": df, _ = pe_trends_df(Timeframe(past_n_days=180)) df = make_pe_trends_eps_df(df, stocks_by_sector()) df = df.set_index("asx_code", drop=True) return df else: raise ValueError("Unsupported dataset {}".format(dataset_wanted))
def test_all_sectors( all_sector_fixture, ): # pylint: disable=unused-argument,redefined-outer-name # since company_details_factory gives a single ANZ company details record, this test will work... ret = all_sectors() # print(ret) assert ret == [("Financials", "Financials")] # and check the reverse is true: financials -> ANZ all_sector_stocks.cache_clear() stocks_by_sector.cache_clear() assert all_sector_stocks("Financials") == set(["ANZ"])
def filter_stocks_to_search(request, what_to_search: str) -> set: assert request is not None assert len(what_to_search) > 0 if what_to_search == "all_stocks": stocks_to_consider = all_stocks() elif what_to_search == "watchlist": stocks_to_consider = user_watchlist(request.user) else: #print(what_to_search) stocks_to_consider = all_sector_stocks(what_to_search) return stocks_to_consider
def form_valid(self, form): sector = form.cleaned_data.get('sector', "Communication Services") norm_method = form.cleaned_data.get('normalisation_method', None) n_days = form.cleaned_data.get('n_days', 30) stocks = all_sector_stocks(sector) timeframe = Timeframe(past_n_days=n_days) cip = selected_cached_stocks_cip(stocks, timeframe) context = self.get_context_data() boxplot, winner_results = plot_boxplot_series(cip, normalisation_method=norm_method) context.update({ 'title': "Past {} day sector performance: box plot trends".format(n_days), 'n_days': n_days, 'sector': sector, 'plot': boxplot, 'winning_stocks': winner_results }) return render(self.request, self.template_name, context)
def form_valid(self, form): sector = form.cleaned_data.get("sector", "Communication Services") norm_method = form.cleaned_data.get("normalisation_method", None) n_days = form.cleaned_data.get("n_days", 30) ld = LazyDictionary() ld["stocks"] = lambda ld: all_sector_stocks(sector) ld["timeframe"] = Timeframe(past_n_days=n_days) ld["cip_df"] = lambda ld: selected_cached_stocks_cip( ld["stocks"], ld["timeframe"]) context = self.get_context_data() def winner_results(df: pd.DataFrame) -> list: # compute star performers: those who are above the mean on a given day counted over all days count = defaultdict(int) avg = df.mean(axis=0) for col in df.columns: winners = df[df[col] > avg[col]][col] for winner in winners.index: count[winner] += 1 results = [] for asx_code, n_wins in count.items(): x = df.loc[asx_code].sum() # avoid "dead cat bounce" stocks which fall spectacularly and then post major increases in percentage terms if x > 0.0: results.append((asx_code, n_wins, x)) return list(reversed(sorted(results, key=lambda t: t[2]))) context.update({ "title": "Past {} day sector performance: box plot trends".format(n_days), "n_days": n_days, "sector": sector, "plot_uri": cache_plot( f"{sector}-recent-sector-view-{ld['timeframe'].description}-{norm_method}", lambda ld: plot_boxplot_series( ld["cip_df"], normalisation_method=norm_method), datasets=ld, ), "winning_stocks": winner_results(ld["cip_df"]), }) return render(self.request, self.template_name, context)
def analyse_sector_performance(stock, sector, all_stocks_cip, window_size=10) -> tuple: assert isinstance(stock, str) assert isinstance(all_stocks_cip, pd.DataFrame) if sector is not None: # not an ETF? ie. sector information available? sector_companies = all_sector_stocks(sector) c_vs_s_plot, sector_momentum_plot = analyse_sector( stock, sector, sector_companies, all_stocks_cip, window_size=window_size) return c_vs_s_plot, sector_momentum_plot, sector_companies else: return (None, None, None)
def stocks(self): if self.sector is None: self.sector = "Information Technology" return sorted(all_sector_stocks(self.sector))
def is_valid_sector(value): assert value is not None return len(all_sector_stocks(value)) > 0
def cluster_stocks_view(request, stocks: str): """ ref: https://pythonforfinance.net/2018/02/08/stock-clusters-using-k-means-algorithm-in-python/ """ validate_user(request.user) timeframe = Timeframe(past_n_days=300) if stocks == "watchlist": asx_codes = user_watchlist(request.user) elif stocks == "etfs": asx_codes = all_etfs() elif stocks.startswith("sector-"): sector_id = int(stocks[7:]) sector = Sector.objects.get(sector_id=sector_id) if sector is None or sector.sector_name is None: raise Http404("No stocks associated with sector") asx_codes = all_sector_stocks(sector.sector_name) else: raise Http404("Unknown stock list {}".format(stocks)) chosen_k = 7 # often a reasonable tradeoff def elbow_curve_plot(ld: LazyDictionary): distortion, _, _, _, _ = make_kmeans_cluster_dataframe( timeframe, chosen_k, asx_codes ) fig = plt.figure(figsize=(15, 5)) plt.plot(range(2, 20), distortion) plt.grid(True) plt.title("Elbow curve") return fig def cluster_plot(ld: LazyDictionary): _, _, centroids, idx, data_df = make_kmeans_cluster_dataframe( timeframe, chosen_k, asx_codes ) centroids_df = pd.DataFrame.from_records( centroids, columns=["return", "volatility"] ) plot = ( p9.ggplot( data_df, p9.aes("return", "volatility", colour="factor(cluster_id)") ) + p9.geom_point(size=3) + p9.facet_wrap("~cluster_id", ncol=3, scales="free") ) return user_theme( plot, x_axis_label="Returns (%)", y_axis_label="Volatility (%)", figure_size=(15, 15), subplots_adjust={"hspace": 0.15, "wspace": 0.15}, ) stocks_as_str = "-".join(sorted(asx_codes)) elbow_curve_uri = cache_plot( f"{request.user.username}-cluster-{stocks_as_str}-elbow-curve-plot", elbow_curve_plot, ) cluster_uri = cache_plot( f"{request.user.username}-cluster-{stocks_as_str}-kmeans-cluster-plot", cluster_plot, ) context = { "elbow_curve_plot_uri": elbow_curve_uri, "k": chosen_k, "dataset": stocks, "n_stocks": len(asx_codes), "cluster_plot_uri": cluster_uri, "timeframe": timeframe, } return render(request, "cluster_stocks.html", context=context)