Example #1
0
def test_stocks_by_sector(comp_deets):
    df = stocks_by_sector()
    assert df is not None
    assert isinstance(df, pd.DataFrame)
    assert len(df) == 1
    assert df.iloc[0].asx_code == 'ANZ'
    assert df.iloc[0].sector_name == 'Financials'
Example #2
0
def plot_breakdown(cip_df: pd.DataFrame):
    """Stacked bar plot of increasing and decreasing stocks per sector in the specified df"""
    cols_to_drop = [colname for colname in cip_df.columns if colname.startswith('bin_')]
    df = cip_df.drop(columns=cols_to_drop)
    df = pd.DataFrame(df.sum(axis='columns'), columns=['sum'])
    df = df.merge(stocks_by_sector(), left_index=True, right_on='asx_code')

    if len(df) == 0: # no stock in cip_df have a sector? ie. ETF?
        return None

    assert set(df.columns) == set(['sum', 'asx_code', 'sector_name'])
    df['increasing'] = df.apply(lambda row: 'up' if row['sum'] >= 0.0 else 'down', axis=1)
    sector_names = df['sector_name'].value_counts().index.tolist() # sort bars by value count (ascending)
    sector_names_cat = pd.Categorical(df['sector_name'], categories=sector_names)
    df = df.assign(sector_name_cat=sector_names_cat)

    #print(df)
    plot = (
        p9.ggplot(df, p9.aes(x='factor(sector_name_cat)', fill='factor(increasing)'))
        + p9.geom_bar()
        + p9.labs(x="Sector", y="Number of stocks")
        + p9.theme(axis_text_y=p9.element_text(size=7), 
                   subplots_adjust={"left": 0.2, 'right': 0.85},
                   legend_title=p9.element_blank()
                  )
        + p9.coord_flip()
    )
    return plot_as_inline_html_data(plot)
Example #3
0
def rank_cumulative_change(df: pd.DataFrame, timeframe: Timeframe):
    cum_sum = defaultdict(float)
    #print(df)
    for date in filter(lambda k: k in df.columns, timeframe.all_dates()):
        for code, price_change in df[date].fillna(0.0).iteritems():
            cum_sum[code] += price_change
        rank = pd.Series(cum_sum).rank(method='first', ascending=False)
        df[date] = rank

    all_available_dates = df.columns
    avgs = df.mean(axis=1)  # NB: do this BEFORE adding columns...
    assert len(avgs) == len(df)
    df['x'] = all_available_dates[-1]
    df['y'] = df[all_available_dates[-1]]

    bins = ['top', 'bin2', 'bin3', 'bin4', 'bin5', 'bottom']
    average_rank_binned = pd.cut(avgs, len(bins), bins)
    assert len(average_rank_binned) == len(df)
    df['bin'] = average_rank_binned
    df['asx_code'] = df.index
    stock_sector_df = stocks_by_sector(
    )  # make one DB call (cached) rather than lots of round-trips
    #print(stock_sector_df)
    stock_sector_df = stock_sector_df.set_index('asx_code')
    #print(df.index)
    df['sector'] = [stock_sector_df.loc[code].sector_name for code in df.index]
    df = pd.melt(df,
                 id_vars=['asx_code', 'bin', 'sector', 'x', 'y'],
                 var_name='date',
                 value_name='rank',
                 value_vars=all_available_dates)
    df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%d")
    df['x'] = pd.to_datetime(df['x'], format="%Y-%m-%d")
    return df
Example #4
0
def plot_market_wide_sector_performance(all_stocks_cip: pd.DataFrame):
    """
    Display specified dates for average sector performance. Each company is assumed to have at zero
    at the start of the observation period. A plot as base64 data is returned.
    """
    n_stocks = len(all_stocks_cip)
    # merge in sector information for each company
    code_and_sector = stocks_by_sector()
    n_unique_sectors = len(code_and_sector["sector_name"].unique())
    print("Found {} unique sectors".format(n_unique_sectors))

    #print(df)
    #print(code_and_sector)
    df = all_stocks_cip.merge(code_and_sector, left_index=True, right_on="asx_code")
    print(
        "Found {} stocks, {} sectors and merged total: {}".format(
            n_stocks, len(code_and_sector), len(df)
        )
    )
    # compute average change in percent of each unique sector over each day and sum over the dates
    cumulative_pct_change = df.expanding(axis="columns").sum()
    # merge date-wise into df
    for date in cumulative_pct_change.columns:
        df[date] = cumulative_pct_change[date]
    # df.to_csv('/tmp/crap.csv')
    grouped_df = df.groupby("sector_name").mean()
    # grouped_df.to_csv('/tmp/crap.csv')

    # ready the dataframe for plotting
    grouped_df = pd.melt(
        grouped_df,
        ignore_index=False,
        var_name="date",
        value_name="cumulative_change_percent",
    )
    grouped_df["sector"] = grouped_df.index
    grouped_df["date"] = pd.to_datetime(grouped_df["date"])
    n_col = 3
    plot = (
        p9.ggplot(
            grouped_df, p9.aes("date", "cumulative_change_percent", color="sector")
        )
        + p9.geom_line(size=1.0)
        + p9.facet_wrap(
            "~sector", nrow=n_unique_sectors // n_col + 1, ncol=n_col, scales="free_y"
        )
        + p9.xlab("")
        + p9.ylab("Average sector change (%)")
        + p9.theme(
            axis_text_x=p9.element_text(angle=30, size=6),
            axis_text_y=p9.element_text(size=6),
            figure_size=(12, 6),
            panel_spacing=0.3,
            legend_position="none",
        )
    )
    return plot_as_inline_html_data(plot)
Example #5
0
def test_stocks_by_sector(
    comp_deets,
):  # pylint: disable=unused-argument,redefined-outer-name
    df = stocks_by_sector()
    assert df is not None
    assert isinstance(df, pd.DataFrame)

    assert len(df) == 1
    assert df.iloc[0].asx_code == "ANZ"
    assert df.iloc[0].sector_name == "Financials"
Example #6
0
def get_dataset(dataset_wanted, request, timeframe=None):
    assert (dataset_wanted in set(["market_sentiment", "eps-per-sector"])
            or dataset_wanted.startswith("kmeans-")
            or dataset_wanted.startswith("financial-metrics-")
            or dataset_wanted.startswith("stock-quotes-"))

    if timeframe is None:
        timeframe = Timeframe(past_n_days=300)

    if dataset_wanted == "market_sentiment":
        df = cached_all_stocks_cip(timeframe)
        return df
    elif dataset_wanted == "kmeans-watchlist":
        _, _, _, _, df = make_kmeans_cluster_dataframe(
            timeframe, 7, user_watchlist(request.user))
        return df
    elif dataset_wanted == "kmeans-etfs":
        _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7,
                                                       all_etfs())
        return df
    elif dataset_wanted.startswith("stock-quotes-"):
        stock = dataset_wanted[len("stock-quotes-"):]
        validate_stock(stock)
        df = company_prices([stock],
                            timeframe=timeframe,
                            fields=all_stock_fundamental_fields,
                            missing_cb=None)
        df['stock_code'] = stock
        return df
    elif dataset_wanted.startswith("kmeans-sector-"):
        sector_id = int(dataset_wanted[14:])
        sector = Sector.objects.get(sector_id=sector_id)
        if sector is None or sector.sector_name is None:
            raise Http404("No stocks associated with sector")
        asx_codes = all_sector_stocks(sector.sector_name)
        _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7, asx_codes)
        return df
    elif dataset_wanted.startswith("financial-metrics-"):
        stock = dataset_wanted[len("financial-metrics-"):]
        validate_stock(stock)
        df = financial_metrics(stock)
        if df is not None:
            # excel doesnt support timezones, so we remove it first
            colnames = [d.strftime("%Y-%m-%d") for d in df.columns]
            df.columns = colnames
            # FALLTHRU
        return df
    elif dataset_wanted == "eps-per-sector":
        df, _ = pe_trends_df(Timeframe(past_n_days=180))
        df = make_pe_trends_eps_df(df, stocks_by_sector())
        df = df.set_index("asx_code", drop=True)
        return df
    else:
        raise ValueError("Unsupported dataset {}".format(dataset_wanted))
Example #7
0
def plot_market_wide_sector_performance(all_dates,
                                        field_name='change_in_percent'):
    """
    Display specified dates for average sector performance. Each company is assumed to have at zero
    at the start of the observation period. A plot as base64 data is returned.
    """
    df = company_prices(None, all_dates=all_dates,
                        fields='change_in_percent')  # None == all stocks
    n_stocks = len(df)
    # merge in sector information for each company
    code_and_sector = stocks_by_sector()
    n_unique_sectors = len(code_and_sector['sector_name'].unique())
    print("Found {} unique sectors".format(n_unique_sectors))

    #print(code_and_sector)
    df = df.merge(code_and_sector, left_on='asx_code', right_on='asx_code')
    print("Found {} stocks, {} sectors and merged total: {}".format(
        n_stocks, len(code_and_sector), len(df)))
    # compute average change in percent of each unique sector over each day and sum over the dates
    cumulative_pct_change = df.expanding(axis='columns').sum()
    # merge date-wise into df
    for date in cumulative_pct_change.columns:
        df[date] = cumulative_pct_change[date]
    #df.to_csv('/tmp/crap.csv')
    grouped_df = df.groupby('sector_name').mean()
    #grouped_df.to_csv('/tmp/crap.csv')

    # ready the dataframe for plotting
    grouped_df = pd.melt(grouped_df,
                         ignore_index=False,
                         var_name='date',
                         value_name='cumulative_change_percent')
    grouped_df['sector'] = grouped_df.index
    grouped_df['date'] = pd.to_datetime(grouped_df['date'])
    n_col = 3
    plot = (p9.ggplot(
        grouped_df, p9.aes('date', 'cumulative_change_percent',
                           color='sector')) + p9.geom_line(size=1.0) +
            p9.facet_wrap('~sector',
                          nrow=n_unique_sectors // n_col + 1,
                          ncol=n_col,
                          scales='free_y') + p9.xlab('') +
            p9.ylab('Average sector change (%)') +
            p9.theme(axis_text_x=p9.element_text(angle=30, size=6),
                     axis_text_y=p9.element_text(size=6),
                     figure_size=(12, 6),
                     panel_spacing=0.3,
                     legend_position='none'))
    return plot_as_inline_html_data(plot)
Example #8
0
    def data_factory(df: pd.DataFrame) -> pd.DataFrame:
        df = df.pivot(
            index=["asx_code", "fetch_date"], columns="field_name", values="field_value"
        )
        required = (df.number_of_shares > 0) & (df.eps > 0.0)
        df = df[required]  # ignore stocks which have unknowns
        # print(df)
        df["total_earnings"] = df["eps"] * df["number_of_shares"]
        df = df.dropna(how="any", axis=0)
        df = df.reset_index()
        df = df.pivot(index="asx_code", columns="fetch_date", values="total_earnings")
        df = df.merge(stocks_by_sector(), left_index=True, right_on="asx_code")
        df = df.set_index("asx_code", drop=True)
        df = df.groupby("sector_name").sum()
        df["sector_name"] = df.index
        df = df.melt(id_vars="sector_name", var_name="fetch_date")
        assert set(df.columns) == set(["sector_name", "fetch_date", "value"])
        df["fetch_date"] = pd.to_datetime(df["fetch_date"], format="%Y-%m-%d")

        return df
Example #9
0
def rank_cumulative_change(df: pd.DataFrame, timeframe: Timeframe):
    cum_sum = defaultdict(float)
    # print(df)
    for date in filter(lambda k: k in df.columns, timeframe.all_dates()):
        for code, price_change in df[date].fillna(0.0).iteritems():
            cum_sum[code] += price_change
        rank = pd.Series(cum_sum).rank(method="first", ascending=False)
        df[date] = rank

    all_available_dates = df.columns
    avgs = df.mean(axis=1)  # NB: do this BEFORE adding columns...
    assert len(avgs) == len(df)
    df["x"] = all_available_dates[-1]
    df["y"] = df[all_available_dates[-1]]

    bins = ["top", "bin2", "bin3", "bin4", "bin5", "bottom"]
    average_rank_binned = pd.cut(avgs, len(bins), bins)
    assert len(average_rank_binned) == len(df)
    df["bin"] = average_rank_binned
    df["asx_code"] = df.index
    stock_sector_df = (
        stocks_by_sector()
    )  # make one DB call (cached) rather than lots of round-trips
    # print(stock_sector_df)
    stock_sector_df = stock_sector_df.set_index("asx_code")
    # print(df.index)
    df = df.merge(
        stock_sector_df, left_index=True, right_on="asx_code"
    )  # NB: this merge will lose rows: those that dont have a sector eg. ETF's
    df = pd.melt(
        df,
        id_vars=["asx_code", "bin", "sector_name", "x", "y"],
        var_name="date",
        value_name="rank",
        value_vars=all_available_dates,
    )
    df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")
    df["x"] = pd.to_datetime(df["x"], format="%Y-%m-%d")
    return df
Example #10
0
def show_pe_trends(request):
    """
    Display a plot of per-sector PE trends across stocks in each sector
    ref: https://www.commsec.com.au/education/learn/choosing-investments/what-is-price-to-earnings-pe-ratio.html
    """
    validate_user(request.user)
    timeframe = Timeframe(past_n_days=180)
    pe_df = company_prices(None,
                           timeframe,
                           fields="pe",
                           missing_cb=None,
                           transpose=True)
    eps_df = company_prices(None,
                            timeframe,
                            fields="eps",
                            missing_cb=None,
                            transpose=True)
    ss = stocks_by_sector()
    ss_dict = {row.asx_code: row.sector_name for row in ss.itertuples()}
    #print(ss_dict)
    eps_stocks = set(eps_df.index)
    n_stocks = len(pe_df)
    positive_pe_stocks = set(pe_df[pe_df.sum(axis=1) > 0.0].index)
    all_stocks = set(pe_df.index)
    n_non_zero_sum = len(positive_pe_stocks)
    #print(exclude_zero_sum)
    records = []
    trading_dates = set(pe_df.columns)

    sector_counts_all_stocks = ss['sector_name'].value_counts()
    all_sectors = set(ss['sector_name'].unique())
    pe_pos_df = pe_df.filter(items=positive_pe_stocks,
                             axis=0).merge(ss,
                                           left_index=True,
                                           right_on='asx_code')
    assert len(pe_pos_df) <= len(positive_pe_stocks) and len(pe_pos_df) > 0
    market_avg_pe_df = pe_pos_df.mean(axis=0).to_frame(
        name='market_pe')  # avg P/E by date series
    market_avg_pe_df['date'] = pd.to_datetime(market_avg_pe_df.index)
    #print(market_avg_pe_df)
    breakdown_by_sector_pe_pos_stocks_only = pe_pos_df[
        'sector_name'].value_counts()
    #print(breakdown_by_sector_pe_pos_stocks_only)
    sector_counts_pe_pos_stocks_only = {
        s[0]: s[1]
        for s in breakdown_by_sector_pe_pos_stocks_only.items()
    }
    #print(sector_counts_pe_pos_stocks_only)
    #print(sector_counts_all_stocks)
    #print(sector_counts_pe_pos_stocks_only)
    for ymd in filter(lambda d: d in trading_dates, timeframe.all_dates(
    )):  # needed to avoid KeyError raised during DataFrame.at[] calls below
        sum_pe_per_sector = defaultdict(float)
        sum_eps_per_sector = defaultdict(float)

        for stock in filter(lambda code: code in ss_dict, all_stocks):
            sector = ss_dict[stock]
            assert isinstance(sector, str)

            if stock in eps_stocks:
                eps = eps_df.at[stock, ymd]
                if isnan(eps):
                    continue
                sum_eps_per_sector[sector] += eps

            if stock in positive_pe_stocks:
                pe = pe_df.at[stock, ymd]
                if isnan(pe):
                    continue
                assert pe >= 0.0
                sum_pe_per_sector[sector] += pe

        #print(sum_pe_per_sector)
        assert len(sector_counts_pe_pos_stocks_only) == len(sum_pe_per_sector)
        assert len(sector_counts_all_stocks) == len(sum_eps_per_sector)
        for sector in all_sectors:
            pe_sum = sum_pe_per_sector.get(sector, None)
            n_pe = sector_counts_pe_pos_stocks_only.get(sector, None)
            pe_mean = pe_sum / n_pe if pe_sum is not None else None
            eps_sum = sum_eps_per_sector.get(sector, None)

            records.append({
                'date': ymd,
                'sector': sector,
                'mean_pe': pe_mean,
                'sum_pe': pe_sum,
                'sum_eps': eps_sum,
                'n_stocks': n_stocks,
                'n_sector_stocks_pe_only': n_pe
            })

    df = pd.DataFrame.from_records(records)
    #print(df[df["sector"] == 'Utilities'])
    #print(df)
    context = {
        "title":
        "PE Trends: {}".format(timeframe.description),
        "n_stocks":
        n_stocks,
        "timeframe":
        timeframe,
        "n_stocks_with_pe":
        n_non_zero_sum,
        "sector_pe_plot":
        plot_sector_field(df, field="mean_pe"),
        "sector_eps_plot":
        plot_sector_field(df, field="sum_eps"),
        "market_pe_plot":
        plot_series(market_avg_pe_df,
                    x='date',
                    y='market_pe',
                    y_axis_label="Market-wide mean P/E",
                    color=None,
                    use_smooth_line=True)
    }
    return render(request, "pe_trends.html", context)
Example #11
0
def show_companies(
    matching_companies,  # may be QuerySet or iterable of stock codes (str)
    request,
    sentiment_timeframe: Timeframe,
    extra_context=None,
    template_name="all_stocks.html",
):
    """
    Support function to public-facing views to eliminate code redundancy
    """
    if isinstance(matching_companies, QuerySet):
        stocks_queryset = matching_companies  # we assume QuerySet is already sorted by desired criteria
    elif matching_companies is None or len(matching_companies) > 0:
        stocks_queryset, _ = latest_quote(matching_companies)
        # FALLTHRU
    else:
        # no companies to report?
        warning(request, "No matching companies.")
        return render(request,
                      template_name,
                      context={"timeframe": sentiment_timeframe})

    # prune companies without a latest price, makes no sense to report them
    stocks_queryset = stocks_queryset.exclude(last_price__isnull=True)

    # sort queryset as this will often be requested by the USER
    arg = request.GET.get("sort_by", "asx_code")
    #info(request, "Sorting by {}".format(arg))

    if arg == "sector" or arg == "sector,-eps":
        ss = {
            s["asx_code"]: s["sector_name"]
            for s in stocks_by_sector().to_dict("records")
        }
        if arg == "sector":
            stocks_queryset = sorted(stocks_queryset,
                                     key=lambda s: ss.get(s.asx_code, "Z")
                                     )  # companies without sector sort last
        else:
            eps_dict = {
                s.asx_code: s.eps if s.eps is not None else 0.0
                for s in stocks_queryset
            }
            stocks_queryset = sorted(
                stocks_queryset,
                key=lambda s:
                (ss.get(s.asx_code, "Z"), -eps_dict.get(s.asx_code, 0.0)),
            )
    else:
        sort_by = tuple(arg.split(","))
        stocks_queryset = stocks_queryset.order_by(*sort_by)

    # keep track of stock codes for template convenience
    asx_codes = [quote.asx_code for quote in stocks_queryset]
    n_top_bottom = (extra_context["n_top_bottom"]
                    if "n_top_bottom" in extra_context else 20)
    print("show_companies: found {} stocks".format(len(asx_codes)))

    # setup context dict for the render
    context = {
        # NB: title and heatmap_title are expected to be supplied by caller via extra_context
        "timeframe": sentiment_timeframe,
        "title": "Caller must override",
        "watched": user_watchlist(request.user),
        "n_stocks": len(asx_codes),
        "n_top_bottom": n_top_bottom,
        "virtual_purchases": user_purchases(request.user),
    }

    # since we sort above, we must setup the pagination also...
    # assert isinstance(stocks_queryset, QuerySet)
    paginator = Paginator(stocks_queryset, 50)
    page_number = request.GET.get("page", 1)
    page_obj = paginator.page(page_number)
    context["page_obj"] = page_obj
    context["object_list"] = paginator

    # compute totals across all dates for the specified companies to look at top10/bottom10 in the timeframe
    ld = LazyDictionary()
    ld["cip_df"] = lambda ld: selected_cached_stocks_cip(
        asx_codes, sentiment_timeframe)
    ld["sum_by_company"] = lambda ld: ld["cip_df"].sum(axis=1,
                                                       numeric_only=True)
    ld["top10"] = lambda ld: ld["sum_by_company"].nlargest(n_top_bottom)
    ld["bottom10"] = lambda ld: ld["sum_by_company"].nsmallest(n_top_bottom)
    ld["stocks_by_sector"] = lambda ld: stocks_by_sector()

    if len(asx_codes) <= 0 or len(ld["top10"]) <= 0:
        warning(request, "No matching companies found.")
    else:
        sorted_codes = "-".join(sorted(asx_codes))
        sentiment_heatmap_uri = cache_plot(
            f"{sorted_codes}-{sentiment_timeframe.description}-stocks-sentiment-plot",
            lambda ld: plot_heatmap(sentiment_timeframe, ld),
            datasets=ld,
        )

        key = f"{sorted_codes}-{sentiment_timeframe.description}-breakdown-plot"
        sector_breakdown_uri = cache_plot(key, plot_breakdown, datasets=ld)

        top10_plot_uri = cache_plot(
            f"top10-plot-{'-'.join(ld['top10'].index)}",
            lambda ld: plot_cumulative_returns(ld["top10"].index, ld),
            datasets=ld,
        )
        bottom10_plot_uri = cache_plot(
            f"bottom10-plot-{'-'.join(ld['bottom10'].index)}",
            lambda ld: plot_cumulative_returns(ld["bottom10"].index, ld),
            datasets=ld,
        )

        context.update({
            "best_ten":
            ld["top10"],
            "worst_ten":
            ld["bottom10"],
            "sentiment_heatmap_uri":
            sentiment_heatmap_uri,
            "sentiment_heatmap_title":
            "{}: {}".format(context["title"], sentiment_timeframe.description),
            "sector_breakdown_uri":
            sector_breakdown_uri,
            "top10_plot_uri":
            top10_plot_uri,
            "bottom10_plot_uri":
            bottom10_plot_uri,
            "timeframe_end_performance":
            timeframe_end_performance(ld),
        })

    if extra_context:
        context.update(extra_context)
    add_messages(request, context)
    # print(context)
    return render(request, template_name, context=context)
Example #12
0
def detect_outliers(stocks: list, all_stocks_cip: pd.DataFrame, rules=None):
    """
    Returns a dataframe describing those outliers present in stocks based on the provided rules.
    All_stocks_cip is the "change in percent" for at least the stocks present in the specified list
    """
    if rules is None:
        rules = default_point_score_rules()
    str_rules = {str(r): r for r in rules}
    rows = []
    stocks_by_sector_df = (stocks_by_sector()
                           )  # NB: ETFs in watchlist will have no sector
    stocks_by_sector_df.index = stocks_by_sector_df["asx_code"]
    for stock in stocks:
        # print("Processing stock: ", stock)
        try:
            sector = stocks_by_sector_df.at[stock, "sector_name"]
            sector_companies = list(stocks_by_sector_df.loc[
                stocks_by_sector_df["sector_name"] == sector].asx_code)
            # day_low_high() may raise KeyError when data is currently being fetched, so it appears here...
            day_low_high_df = day_low_high(stock, all_stocks_cip.columns)
        except KeyError:
            warning(
                None,
                "Unable to locate watchlist entry: {} - continuing without it".
                format(stock),
            )
            continue
        state = {
            "day_low_high_df":
            day_low_high_df,  # never changes each day, so we init it here
            "all_stocks_change_in_percent_df": all_stocks_cip,
            "stock": stock,
            "daily_range_threshold":
            0.20,  # 20% at either end of the daily range gets a point
        }
        points_by_rule = defaultdict(int)
        for date in all_stocks_cip.columns:
            market_avg = all_stocks_cip[date].mean()
            sector_avg = all_stocks_cip[date].filter(
                items=sector_companies).mean()
            stock_move = all_stocks_cip.at[stock, date]
            state.update({
                "market_avg": market_avg,
                "sector_avg": sector_avg,
                "stock_move": stock_move,
                "date": date,
            })
            for rule_name, rule in str_rules.items():
                try:
                    points_by_rule[rule_name] += rule(state)
                except TypeError:  # handle nan's in dataset safely
                    pass
        d = {"stock": stock}
        d.update(points_by_rule)
        rows.append(d)
    df = pd.DataFrame.from_records(rows)
    df = df.set_index("stock")
    # print(df)
    clf = IForest()
    clf.fit(df)
    scores = clf.predict(df)
    results = [
        row[0] for row, value in zip(df.iterrows(), scores) if value > 0
    ]
    # print(results)
    print("Found {} outlier stocks".format(len(results)))
    return results
Example #13
0
def market_sentiment(request, n_days=21, n_top_bottom=20, sector_n_days=365):
    validate_user(request.user)
    assert n_days > 0
    assert n_top_bottom > 0

    def market_cap_data_factory(ld: LazyDictionary) -> pd.DataFrame:
        dates = ld["sector_timeframe"].all_dates()
        # print(dates)
        assert len(dates) > 90
        result_df = None
        adjusted_dates = []
        for the_date in [dates[0], dates[-1], dates[-30], dates[-90]]:
            print(f"Before valid_quotes_only for {the_date}")
            quotes, actual_trading_date = valid_quotes_only(
                the_date, ensure_date_has_data=True)
            print(f"After valid_quotes_only for {the_date}")
            print(f"Before make quotes {actual_trading_date}")
            print(len(quotes))
            df = make_quote_df(quotes, ld["asx_codes"], actual_trading_date)
            print("After make_quote_df")
            result_df = df if result_df is None else result_df.append(df)
            if the_date != actual_trading_date:
                adjusted_dates.append(the_date)

        if len(adjusted_dates) > 0:
            warning(
                request,
                "Some dates were not trading days, adjusted: {}".format(
                    adjusted_dates),
            )
        return result_df

    ld = LazyDictionary()
    ld["asx_codes"] = lambda ld: all_stocks()
    ld["sector_timeframe"] = lambda ld: Timeframe(past_n_days=sector_n_days)
    ld["timeframe"] = lambda ld: Timeframe(past_n_days=n_days)
    ld["sector_df"] = lambda ld: cached_all_stocks_cip(ld["sector_timeframe"])
    ld["sector_cumsum_df"] = lambda ld: ld["sector_df"].cumsum(axis=1)
    ld["cip_df"] = lambda ld: ld["sector_df"].filter(
        items=ld["timeframe"].all_dates(), axis=1)
    ld["market_cap_df"] = lambda ld: market_cap_data_factory(ld)
    ld["stocks_by_sector"] = lambda ld: stocks_by_sector()

    sentiment_plot = cache_plot(
        f"market-sentiment-{ld['timeframe'].description}",
        lambda ld: plot_heatmap(ld["timeframe"], ld),
        datasets=ld,
    )
    sector_descr = ld["sector_timeframe"].description
    sector_performance_plot = cache_plot(
        f"sector-performance-{sector_descr}",
        lambda ld: plot_market_wide_sector_performance(ld),
        datasets=ld,
    )
    market_cap_dist_plot = cache_plot(
        f"market-cap-dist-{sector_descr}",
        lambda ld: plot_market_cap_distribution(ld),
        datasets=ld,
    )

    df = ld["sector_cumsum_df"].transpose()
    df.index = pd.to_datetime(df.index, format="%Y-%m-%d")
    df = (df.resample("BM", ).asfreq().diff(periods=1))
    ld["monthly_returns_by_stock"] = df
    # print(df)

    context = {
        "sentiment_uri":
        sentiment_plot,
        "n_days":
        ld["timeframe"].n_days,
        "n_stocks_plotted":
        len(ld["asx_codes"]),
        "n_top_bottom":
        n_top_bottom,
        "watched":
        user_watchlist(request.user),
        "sector_performance_uri":
        sector_performance_plot,
        "sector_timeframe":
        ld["sector_timeframe"],
        "sector_performance_title":
        "Cumulative sector avg. performance: {}".format(
            ld["sector_timeframe"].description),
        "title":
        "Market sentiment",
        "market_cap_distribution_uri":
        market_cap_dist_plot,
        "monthly_sector_mean_returns":
        plot_sector_monthly_mean_returns(ld),
    }
    return render(request, "market_sentiment_view.html", context=context)
Example #14
0
def show_pe_trends(request):
    """
    Display a plot of per-sector PE trends across stocks in each sector
    ref: https://www.commsec.com.au/education/learn/choosing-investments/what-is-price-to-earnings-pe-ratio.html
    """
    validate_user(request.user)

    def make_pe_trends_market_avg_df(ld: LazyDictionary) -> pd.DataFrame:
        df = ld["data_df"]
        ss = ld["stocks_by_sector"]
        pe_pos_df, _ = make_pe_trends_positive_pe_df(df, ss)
        market_avg_pe_df = pe_pos_df.mean(axis=0, numeric_only=True).to_frame(
            name="market_pe")  # avg P/E by date series
        market_avg_pe_df["date"] = pd.to_datetime(market_avg_pe_df.index)
        return market_avg_pe_df

    def sector_eps_data_factory(ld: LazyDictionary) -> pd.DataFrame:
        df = ld["data_df"]
        n_stocks = df["asx_code"].nunique()
        pe_df, positive_pe_stocks = ld["positive_pe_tuple"]
        eps_df = ld["eps_df"]
        ss = ld["stocks_by_sector"]

        # print(positive_pe_stocks)
        eps_stocks = set(eps_df.index)
        ss_dict = {row.asx_code: row.sector_name for row in ss.itertuples()}
        # print(ss_dict)

        trading_dates = set(pe_df.columns)
        trading_dates.remove("sector_name")
        sector_counts_all_stocks = ss["sector_name"].value_counts()
        all_sectors = set(ss["sector_name"].unique())
        breakdown_by_sector_pe_pos_stocks_only = pe_df[
            "sector_name"].value_counts()
        # print(breakdown_by_sector_pe_pos_stocks_only)
        sector_counts_pe_pos_stocks_only = {
            s[0]: s[1]
            for s in breakdown_by_sector_pe_pos_stocks_only.items()
        }
        # print(sector_counts_pe_pos_stocks_only)
        # print(sector_counts_all_stocks)
        # print(sector_counts_pe_pos_stocks_only)
        records = []
        for ymd in filter(
                lambda d: d in trading_dates, ld["timeframe"].all_dates()
        ):  # needed to avoid KeyError raised during DataFrame.at[] calls below
            sum_pe_per_sector = defaultdict(float)
            sum_eps_per_sector = defaultdict(float)

            for stock in filter(lambda code: code in ss_dict,
                                positive_pe_stocks):
                sector = ss_dict[stock]
                assert isinstance(sector, str)

                if stock in eps_stocks:
                    eps = eps_df.at[stock, ymd]
                    if isnan(eps):
                        continue
                    sum_eps_per_sector[sector] += eps

                if stock in positive_pe_stocks:
                    pe = pe_df.at[stock, ymd]
                    if isnan(pe):
                        continue
                    assert pe >= 0.0
                    sum_pe_per_sector[sector] += pe

            # print(len(sector_counts_all_stocks))
            # print(len(sum_eps_per_sector))
            assert len(sector_counts_pe_pos_stocks_only) >= len(
                sum_pe_per_sector)
            assert len(sector_counts_all_stocks) >= len(sum_eps_per_sector)
            for sector in all_sectors:
                pe_sum = sum_pe_per_sector.get(sector, None)
                n_pe = sector_counts_pe_pos_stocks_only.get(sector, None)
                pe_mean = pe_sum / n_pe if pe_sum is not None else None
                eps_sum = sum_eps_per_sector.get(sector, None)

                records.append({
                    "date": ymd,
                    "sector": sector,
                    "mean_pe": pe_mean,
                    "sum_pe": pe_sum,
                    "sum_eps": eps_sum,
                    "n_stocks": n_stocks,
                    "n_sector_stocks_pe_only": n_pe,
                })
        df = pd.DataFrame.from_records(records)
        # print(df[df["sector"] == 'Utilities'])
        # print(df)
        return df

    ld = LazyDictionary()
    ld["data_df"] = lambda ld: pe_trends_df(ld["timeframe"])
    ld["positive_pe_tuple"] = lambda ld: make_pe_trends_positive_pe_df(
        ld["data_df"], ld["stocks_by_sector"])
    ld["market_avg_pe_df"] = lambda ld: make_pe_trends_market_avg_df(ld)
    ld["eps_df"] = lambda ld: make_pe_trends_eps_df(ld["data_df"])
    ld["sector_eps_df"] = lambda ld: sector_eps_data_factory(ld)
    ld["stocks_by_sector"] = stocks_by_sector()
    ld["timeframe"] = Timeframe(past_n_days=180)
    td = ld["timeframe"].description

    # these arent per-user plots: they can safely be shared across all users of the site, so the key reflects that
    sector_pe_cache_key = f"{td}-by-sector-pe-plot"
    sector_eps_cache_key = f"{td}-by-sector-eps-plot"
    market_pe_cache_key = f"{td}-market-pe-mean"
    market_pe_plot_uri = cache_plot(
        market_pe_cache_key,
        lambda ld: plot_series(
            ld["market_avg_pe_df"],
            x="date",
            y="market_pe",
            y_axis_label="Market-wide mean P/E",
            color=None,
            use_smooth_line=True,
        ),
        datasets=ld,
    )

    context = {
        "title":
        "PE Trends",
        "n_stocks":
        ld["data_df"]["asx_code"].nunique(),
        "timeframe":
        ld["timeframe"],
        "n_stocks_with_pe":
        len(ld["positive_pe_tuple"][1]),
        "sector_pe_plot_uri":
        cache_plot(
            sector_pe_cache_key,
            lambda ld: plot_sector_field(ld["sector_eps_df"], field="mean_pe"),
            datasets=ld,
        ),
        "sector_eps_plot_uri":
        cache_plot(
            sector_eps_cache_key,
            lambda ld: plot_sector_field(ld["sector_eps_df"], field="sum_eps"),
            datasets=ld,
        ),
        "market_pe_plot_uri":
        market_pe_plot_uri,
        "sector_positive_top_contributors_eps_uri":
        cache_plot(
            f"top-contributors-{sector_eps_cache_key}",
            lambda ld: plot_sector_top_eps_contributors(
                ld["eps_df"], ld["stocks_by_sector"]),
            datasets=ld,
        ),
    }
    return render(request, "pe_trends.html", context)