Example #1
0
def test_company_prices(quotation_fixture, monkeypatch):
    #expected_dates = ['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04', '2021-01-05', '2021-01-06']
    monkeypatch.setattr(mdl, 'make_superdf', mock_superdf_all_stocks)

    # basic check
    required_timeframe = Timeframe(from_date='2021-01-01', n=6)
    df = company_prices(['ABC', 'OTHER'], 
                        required_timeframe,
                        fields='last_price', 
                        missing_cb=None, 
                        transpose=True)
    assert isinstance(df, pd.DataFrame)
   
    assert len(df) == 2
    assert list(df.index) == ['ABC', 'OTHER']
    assert list(df.columns) == ['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04', '2021-01-05', '2021-01-06']
    is_other_nan = list(np.isnan(df.loc['OTHER']))
    assert is_other_nan == [False, True, True, True, True, True]
   
    # check impute missing functionality
    df2 = company_prices(['ABC', 'OTHER'], required_timeframe, fields='last_price', transpose=True)
    assert list(df2.loc['OTHER']) == [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

    # finally check that a multi-field DataFrame is as requested
    monkeypatch.setattr(mdl, 'make_superdf', mock_superdf_many_fields)
Example #2
0
def pe_trends_df(timeframe: Timeframe) -> pd.DataFrame:
    # we fetch all required fields for this view in one call to company_prices() - more efficient on DB
    df = company_prices(None,
                        timeframe,
                        fields=["pe", "eps", "number_of_shares"],
                        missing_cb=None)
    return df
Example #3
0
def make_kmeans_cluster_dataframe(timeframe: Timeframe, chosen_k: int,
                                  stocks: Iterable[str]) -> tuple:
    prices_df = company_prices(stocks, timeframe, fields="last_price")
    # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    #    print(prices_df)
    s1 = prices_df.pct_change().mean() * 252
    s2 = prices_df.pct_change().std() * math.sqrt(252.0)
    # print(s1)
    data_df = pd.DataFrame.from_dict({"return": s1, "volatility": s2})
    # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    #    print(data_df)
    data_df = data_df.dropna(
    )  # calculation may produce inf/nan so purge now...
    data = np.asarray(
        [np.asarray(data_df["return"]),
         np.asarray(data_df["volatility"])]).T
    distortion = []
    for k in range(2, 20):
        k_means = KMeans(n_clusters=k)
        k_means.fit(data)
        distortion.append(k_means.inertia_)
    # computing K-Means with K = 5 (5 clusters)
    centroids, _ = kmeans(data, chosen_k)
    # assign each sample to a cluster
    idx, _ = vq(data, centroids)
    data_df["cluster_id"] = idx
    return distortion, chosen_k, centroids, idx, data_df
Example #4
0
    def recalc_queryset(self, **kwargs):
        n_days = kwargs.get("n_days", 30)
        stocks_to_consider = filter_stocks_to_search(
            self.request, kwargs.get("what_to_search"))
        period1 = kwargs.get("period1", 20)
        period2 = kwargs.get("period2", 200)

        matching_stocks = set()
        self.timeframe = Timeframe(past_n_days=n_days)

        assert period2 > period1
        df = company_prices(stocks_to_consider,
                            Timeframe(past_n_days=n_days + period2),
                            transpose=False)
        # print(df)
        wanted_dates = set(self.timeframe.all_dates())
        for s in filter(lambda asx_code: asx_code in df.columns,
                        stocks_to_consider):
            last_price = df[s]
            # we filter now because it is after the warm-up period for MA200....
            ma20 = last_price.rolling(period1).mean().filter(
                items=wanted_dates, axis=0)
            ma200 = (last_price.rolling(period2,
                                        min_periods=min([
                                            50, 3 * period1
                                        ])).mean().filter(items=wanted_dates,
                                                          axis=0))

            matching_dates = set(
                [xo[1] for xo in calc_ma_crossover_points(ma20, ma200)])
            if len(matching_dates.intersection(wanted_dates)) > 0:
                matching_stocks.add(s)
        return list(matching_stocks)
Example #5
0
def setup_optimisation_matrices(stocks, timeframe: Timeframe, exclude_price,
                                warning_cb):
    # ref: https://pyportfolioopt.readthedocs.io/en/latest/UserGuide.html#processing-historical-prices

    stock_prices = company_prices(stocks,
                                  timeframe,
                                  fields="last_price",
                                  missing_cb=None)
    stock_prices = stock_prices.fillna(method="bfill", limit=10, axis=0)
    latest_date = stock_prices.index[-1]
    earliest_date = stock_prices.index[0]
    # print(stock_prices)

    stock_prices = remove_bad_stocks(stock_prices, earliest_date,
                                     exclude_price, warning_cb)
    stock_prices = remove_bad_stocks(stock_prices, latest_date, exclude_price,
                                     warning_cb)

    latest_prices = stock_prices.loc[latest_date]
    first_prices = stock_prices.loc[earliest_date]
    all_returns = returns_from_prices(stock_prices,
                                      log_returns=False).fillna(value=0.0)

    # check that the matrices are consistent to each other
    assert stock_prices.shape[1] == latest_prices.shape[0]
    assert stock_prices.shape[1] == all_returns.shape[1]
    assert all_returns.shape[0] == stock_prices.shape[0] - 1
    assert len(stock_prices.columns) > 0  # must have at least 1 stock
    assert len(stock_prices) > 7  # and at least one trading week of data

    # print(stock_prices.shape)
    # print(latest_prices)
    # print(all_returns.shape)

    return all_returns, stock_prices, latest_prices, first_prices
Example #6
0
def get_dataset(dataset_wanted, request, timeframe=None):
    assert (dataset_wanted in set(["market_sentiment", "eps-per-sector"])
            or dataset_wanted.startswith("kmeans-")
            or dataset_wanted.startswith("financial-metrics-")
            or dataset_wanted.startswith("stock-quotes-"))

    if timeframe is None:
        timeframe = Timeframe(past_n_days=300)

    if dataset_wanted == "market_sentiment":
        df = cached_all_stocks_cip(timeframe)
        return df
    elif dataset_wanted == "kmeans-watchlist":
        _, _, _, _, df = make_kmeans_cluster_dataframe(
            timeframe, 7, user_watchlist(request.user))
        return df
    elif dataset_wanted == "kmeans-etfs":
        _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7,
                                                       all_etfs())
        return df
    elif dataset_wanted.startswith("stock-quotes-"):
        stock = dataset_wanted[len("stock-quotes-"):]
        validate_stock(stock)
        df = company_prices([stock],
                            timeframe=timeframe,
                            fields=all_stock_fundamental_fields,
                            missing_cb=None)
        df['stock_code'] = stock
        return df
    elif dataset_wanted.startswith("kmeans-sector-"):
        sector_id = int(dataset_wanted[14:])
        sector = Sector.objects.get(sector_id=sector_id)
        if sector is None or sector.sector_name is None:
            raise Http404("No stocks associated with sector")
        asx_codes = all_sector_stocks(sector.sector_name)
        _, _, _, _, df = make_kmeans_cluster_dataframe(timeframe, 7, asx_codes)
        return df
    elif dataset_wanted.startswith("financial-metrics-"):
        stock = dataset_wanted[len("financial-metrics-"):]
        validate_stock(stock)
        df = financial_metrics(stock)
        if df is not None:
            # excel doesnt support timezones, so we remove it first
            colnames = [d.strftime("%Y-%m-%d") for d in df.columns]
            df.columns = colnames
            # FALLTHRU
        return df
    elif dataset_wanted == "eps-per-sector":
        df, _ = pe_trends_df(Timeframe(past_n_days=180))
        df = make_pe_trends_eps_df(df, stocks_by_sector())
        df = df.set_index("asx_code", drop=True)
        return df
    else:
        raise ValueError("Unsupported dataset {}".format(dataset_wanted))
Example #7
0
def make_portfolio_performance_dataframe(
        stocks: Iterable[str], timeframe: Timeframe,
        purchases: Iterable[VirtualPurchase]) -> pd.DataFrame:
    def sum_portfolio(df: pd.DataFrame, date_str: str, stock_items):
        validate_date(date_str)

        portfolio_worth = sum(
            map(lambda t: df.at[t[0], date_str] * t[1], stock_items))
        return portfolio_worth

    df = company_prices(stocks, timeframe, transpose=True)
    rows = []
    stock_count = defaultdict(int)
    stock_cost = defaultdict(float)
    portfolio_cost = 0.0

    for d in [
            datetime.strptime(x, "%Y-%m-%d").date()
            for x in timeframe.all_dates()
    ]:
        d_str = str(d)
        if d_str not in df.columns:  # not a trading day?
            continue
        purchases_to_date = filter(lambda vp, d=d: vp.buy_date <= d, purchases)
        for purchase in purchases_to_date:
            if purchase.buy_date == d:
                portfolio_cost += purchase.amount
                stock_count[purchase.asx_code] += purchase.n
                stock_cost[purchase.asx_code] += purchase.amount

        portfolio_worth = sum_portfolio(df, d_str, stock_count.items())
        # print(df)
        # emit rows for each stock and aggregate portfolio
        for asx_code in stocks:
            cur_price = df.at[asx_code, d_str]
            if np.isnan(cur_price):  # price missing? ok, skip record
                continue
            assert cur_price is not None and cur_price >= 0.0
            stock_worth = cur_price * stock_count[asx_code]

            rows.append({
                "portfolio_cost": portfolio_cost,
                "portfolio_worth": portfolio_worth,
                "portfolio_profit": portfolio_worth - portfolio_cost,
                "stock_cost": stock_cost[asx_code],
                "stock_worth": stock_worth,
                "stock_profit": stock_worth - stock_cost[asx_code],
                "date": d_str,
                "stock": asx_code,
            })

    df = pd.DataFrame.from_records(rows)
    df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")
    return df
Example #8
0
 def dataframe(ld: LazyDictionary) -> pd.DataFrame:
     momentum_timeframe = Timeframe(
         past_n_days=n_days + 200
     )  # to warmup MA200 function
     df = company_prices(
         (stock,),
         momentum_timeframe,
         fields=all_stock_fundamental_fields,
         missing_cb=None,
     )
     return df
Example #9
0
def test_company_prices(
    quotation_fixture, monkeypatch
):  # pylint: disable=unused-argument,redefined-outer-name
    # expected_dates = ['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04', '2021-01-05', '2021-01-06']
    monkeypatch.setattr(mdl, "make_superdf", mock_superdf_all_stocks)

    # basic check
    required_timeframe = Timeframe(from_date="2021-01-01", n=6)
    df = company_prices(
        ["ABC", "OTHER"],
        required_timeframe,
        fields="last_price",
        missing_cb=None,
        transpose=True,
    )
    assert isinstance(df, pd.DataFrame)

    assert len(df) == 2
    assert list(df.index) == ["ABC", "OTHER"]
    assert list(df.columns) == [
        "2021-01-01",
        "2021-01-02",
        "2021-01-03",
        "2021-01-04",
        "2021-01-05",
        "2021-01-06",
    ]
    is_other_nan = list(np.isnan(df.loc["OTHER"]))
    assert is_other_nan == [False, True, True, True, True, True]

    # check impute missing functionality
    df2 = company_prices(
        ["ABC", "OTHER"], required_timeframe, fields="last_price", transpose=True
    )
    assert list(df2.loc["OTHER"]) == [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

    # finally check that a multi-field DataFrame is as requested
    monkeypatch.setattr(mdl, "make_superdf", mock_superdf_many_fields)
Example #10
0
def show_fundamentals(request, stock=None, n_days=2 * 365):
    validate_user(request.user)
    validate_stock(stock)
    timeframe = Timeframe(past_n_days=n_days)
    df = company_prices(
        [stock],
        timeframe,
        fields=("eps", "volume", "last_price", "annual_dividend_yield", \
                "pe", "change_in_percent", "change_price", "market_cap", \
                "number_of_shares"),
        missing_cb=None
    )
    #print(df)
    df['change_in_percent_cumulative'] = df['change_in_percent'].cumsum(
    )  # nicer to display cumulative
    df = df.drop('change_in_percent', axis=1)
    fundamentals_plot = plot_fundamentals(df, stock)
    context = {
        "asx_code": stock,
        "is_fundamentals": True,
        "fundamentals_plot": fundamentals_plot
    }
    return render(request, "stock_fundamentals.html", context)
Example #11
0
def show_pe_trends(request):
    """
    Display a plot of per-sector PE trends across stocks in each sector
    ref: https://www.commsec.com.au/education/learn/choosing-investments/what-is-price-to-earnings-pe-ratio.html
    """
    validate_user(request.user)
    timeframe = Timeframe(past_n_days=180)
    pe_df = company_prices(None,
                           timeframe,
                           fields="pe",
                           missing_cb=None,
                           transpose=True)
    eps_df = company_prices(None,
                            timeframe,
                            fields="eps",
                            missing_cb=None,
                            transpose=True)
    ss = stocks_by_sector()
    ss_dict = {row.asx_code: row.sector_name for row in ss.itertuples()}
    #print(ss_dict)
    eps_stocks = set(eps_df.index)
    n_stocks = len(pe_df)
    positive_pe_stocks = set(pe_df[pe_df.sum(axis=1) > 0.0].index)
    all_stocks = set(pe_df.index)
    n_non_zero_sum = len(positive_pe_stocks)
    #print(exclude_zero_sum)
    records = []
    trading_dates = set(pe_df.columns)

    sector_counts_all_stocks = ss['sector_name'].value_counts()
    all_sectors = set(ss['sector_name'].unique())
    pe_pos_df = pe_df.filter(items=positive_pe_stocks,
                             axis=0).merge(ss,
                                           left_index=True,
                                           right_on='asx_code')
    assert len(pe_pos_df) <= len(positive_pe_stocks) and len(pe_pos_df) > 0
    market_avg_pe_df = pe_pos_df.mean(axis=0).to_frame(
        name='market_pe')  # avg P/E by date series
    market_avg_pe_df['date'] = pd.to_datetime(market_avg_pe_df.index)
    #print(market_avg_pe_df)
    breakdown_by_sector_pe_pos_stocks_only = pe_pos_df[
        'sector_name'].value_counts()
    #print(breakdown_by_sector_pe_pos_stocks_only)
    sector_counts_pe_pos_stocks_only = {
        s[0]: s[1]
        for s in breakdown_by_sector_pe_pos_stocks_only.items()
    }
    #print(sector_counts_pe_pos_stocks_only)
    #print(sector_counts_all_stocks)
    #print(sector_counts_pe_pos_stocks_only)
    for ymd in filter(lambda d: d in trading_dates, timeframe.all_dates(
    )):  # needed to avoid KeyError raised during DataFrame.at[] calls below
        sum_pe_per_sector = defaultdict(float)
        sum_eps_per_sector = defaultdict(float)

        for stock in filter(lambda code: code in ss_dict, all_stocks):
            sector = ss_dict[stock]
            assert isinstance(sector, str)

            if stock in eps_stocks:
                eps = eps_df.at[stock, ymd]
                if isnan(eps):
                    continue
                sum_eps_per_sector[sector] += eps

            if stock in positive_pe_stocks:
                pe = pe_df.at[stock, ymd]
                if isnan(pe):
                    continue
                assert pe >= 0.0
                sum_pe_per_sector[sector] += pe

        #print(sum_pe_per_sector)
        assert len(sector_counts_pe_pos_stocks_only) == len(sum_pe_per_sector)
        assert len(sector_counts_all_stocks) == len(sum_eps_per_sector)
        for sector in all_sectors:
            pe_sum = sum_pe_per_sector.get(sector, None)
            n_pe = sector_counts_pe_pos_stocks_only.get(sector, None)
            pe_mean = pe_sum / n_pe if pe_sum is not None else None
            eps_sum = sum_eps_per_sector.get(sector, None)

            records.append({
                'date': ymd,
                'sector': sector,
                'mean_pe': pe_mean,
                'sum_pe': pe_sum,
                'sum_eps': eps_sum,
                'n_stocks': n_stocks,
                'n_sector_stocks_pe_only': n_pe
            })

    df = pd.DataFrame.from_records(records)
    #print(df[df["sector"] == 'Utilities'])
    #print(df)
    context = {
        "title":
        "PE Trends: {}".format(timeframe.description),
        "n_stocks":
        n_stocks,
        "timeframe":
        timeframe,
        "n_stocks_with_pe":
        n_non_zero_sum,
        "sector_pe_plot":
        plot_sector_field(df, field="mean_pe"),
        "sector_eps_plot":
        plot_sector_field(df, field="sum_eps"),
        "market_pe_plot":
        plot_series(market_avg_pe_df,
                    x='date',
                    y='market_pe',
                    y_axis_label="Market-wide mean P/E",
                    color=None,
                    use_smooth_line=True)
    }
    return render(request, "pe_trends.html", context)
Example #12
0
def optimise_portfolio(
    stocks,
    timeframe: Timeframe,
    algo="ef-minvol",
    max_stocks=80,
    total_portfolio_value=100 * 1000,
    exclude_price=None,
    warning_cb=None,
    **kwargs,
):
    assert len(stocks) >= 1
    assert timeframe is not None
    assert total_portfolio_value > 0
    assert max_stocks >= 5

    (
        all_returns,
        stock_prices,
        latest_prices,
        first_prices,
    ) = setup_optimisation_matrices(stocks, timeframe, exclude_price,
                                    warning_cb)

    market_prices = company_prices(("A200", ),
                                   Timeframe(past_n_days=180),
                                   missing_cb=None)
    market_prices.index = pd.to_datetime(market_prices.index,
                                         format="%Y-%m-%d")
    market_prices = pd.Series(market_prices["A200"])
    quotes, ymd = valid_quotes_only("latest", ensure_date_has_data=True)

    for t in ((10, 0.0001), (20, 0.0005), (30, 0.001), (40, 0.005), (50,
                                                                     0.01)):
        filtered_stocks, n_stocks = select_suitable_stocks(
            all_returns, stock_prices, max_stocks, *t)
        # since the sample of stocks might be different, we must recompute each iteration...
        filtered_stocks = filtered_stocks.sample(n=n_stocks, axis=1)
        # print(len(filtered_stocks.columns))
        market_caps = {
            q.asx_code: q.market_cap
            for q in quotes if q.asx_code in filtered_stocks.columns
        }

        ld = (LazyDictionary()
              )  # must start a new dict since each key is immutable after use
        ld["n_stocks"] = n_stocks
        ld["filtered_stocks"] = filtered_stocks
        ld["market_prices"] = market_prices
        ld["market_caps"] = market_caps
        ld["total_portfolio_value"] = total_portfolio_value
        ld["returns_by"] = kwargs.get("returns_by", "by_prices")

        strategy, kwargs = assign_strategy(ld, algo)
        try:
            run_iteration(
                ld,
                strategy,
                first_prices,
                latest_prices,
                filtered_stocks,
                **kwargs,
            )

            # NB: we dont bother caching these plots since we must calculate so many other values but we need to serve them via cache_plot() anyway
            ld["efficient_frontier_plot"] = cache_plot(
                secrets.token_urlsafe(32), plot_random_portfolios, datasets=ld)
            ld["correlation_plot"] = lambda ld: cache_plot(
                secrets.token_urlsafe(32),
                lambda ld: plot_covariance(ld["m"], plot_correlation=True).
                figure,
                datasets=ld,
            )
            return ld
        except ValueError as ve:
            if warning_cb:
                warning_cb(
                    "Unable to optimise stocks with min_unique={} and var_min={}: n_stocks={} - {}"
                    .format(t[0], t[1], n_stocks, str(ve)))
            # try next iteration
            raise ve

    print("*** WARNING: unable to optimise portolio!")
    return LazyDictionary()
Example #13
0
def show_purchase_performance(request):
    purchase_buy_dates = []
    purchases = []
    stocks = []
    for stock, purchases_for_stock in user_purchases(request.user).items():
        stocks.append(stock)
        for purchase in purchases_for_stock:
            purchase_buy_dates.append(purchase.buy_date)
            purchases.append(purchase)

    purchase_buy_dates = sorted(purchase_buy_dates)
    # print("earliest {} latest {}".format(purchase_buy_dates[0], purchase_buy_dates[-1]))

    timeframe = Timeframe(from_date=str(purchase_buy_dates[0]),
                          to_date=all_available_dates()[-1])
    df = company_prices(stocks, timeframe, transpose=True)
    rows = []
    stock_count = defaultdict(int)
    stock_cost = defaultdict(float)
    portfolio_cost = 0.0

    for d in [
            datetime.strptime(x, "%Y-%m-%d").date()
            for x in timeframe.all_dates()
    ]:
        d_str = str(d)
        if d_str not in df.columns:  # not a trading day?
            continue
        purchases_to_date = filter(lambda vp, d=d: vp.buy_date <= d, purchases)
        for purchase in purchases_to_date:
            if purchase.buy_date == d:
                portfolio_cost += purchase.amount
                stock_count[purchase.asx_code] += purchase.n
                stock_cost[purchase.asx_code] += purchase.amount

        portfolio_worth = sum_portfolio(df, d_str, stock_count.items())
        #print(df)
        # emit rows for each stock and aggregate portfolio
        for asx_code in stocks:
            cur_price = df.at[asx_code, d_str]
            if np.isnan(cur_price):  # price missing? ok, skip record
                continue
            assert cur_price is not None and cur_price >= 0.0
            stock_worth = cur_price * stock_count[asx_code]

            rows.append({
                "portfolio_cost": portfolio_cost,
                "portfolio_worth": portfolio_worth,
                "portfolio_profit": portfolio_worth - portfolio_cost,
                "stock_cost": stock_cost[asx_code],
                "stock_worth": stock_worth,
                "stock_profit": stock_worth - stock_cost[asx_code],
                "date": d_str,
                "stock": asx_code,
            })

    t = plot_portfolio(pd.DataFrame.from_records(rows))
    portfolio_performance_figure, stock_performance_figure, profit_contributors_figure = t
    context = {
        "title": "Portfolio performance",
        "portfolio_title": "Overall",
        "portfolio_figure": portfolio_performance_figure,
        "stock_title": "Stock",
        "stock_figure": stock_performance_figure,
        "profit_contributors": profit_contributors_figure,
    }
    return render(request, "portfolio_trends.html", context=context)