def test_day_low_high(quotation_fixture): # NB: dates and stock must correspond to quotation_fixture df = day_low_high('ABC', all_dates=['2021-01-01', '2021-01-02', '2021-01-03']) assert set(df.columns) == set(['day_low_price', 'day_high_price', 'last_price', 'volume', 'date']) result = pd.Series(df['day_high_price'] - df['day_low_price']) expected_result = pd.Series({"2021-01-01": 0.3, "2021-01-02": 0.30, "2021-01-03": 0.3}) pd.testing.assert_series_equal(result, expected_result, check_names=False)
def analyse_point_scores(stock: str, sector_companies, all_stocks_cip: pd.DataFrame, rules=None): """ Visualise the stock in terms of point scores as described on the stock view page. Rules to apply can be specified by rules (default rules are provided by rule_*()) Points are lost for equivalent downturns and the result plotted. All rows in all_stocks_cip will be used to calculate the market average on a given trading day, whilst only sector_companies will be used to calculate the sector average. A utf-8 base64 encoded plot image is returned """ assert len(stock) >= 3 assert all_stocks_cip is not None if rules is None: rules = default_point_score_rules() rows = [] points = 0 day_low_high_df = day_low_high(stock, all_dates=all_stocks_cip.columns) state = { 'day_low_high_df': day_low_high_df, # never changes each day, so we init it here 'all_stocks_change_in_percent_df': all_stocks_cip, 'stock': stock, 'daily_range_threshold': 0.20, # 20% at either end of the daily range gets a point } for date in all_stocks_cip.columns: market_avg = all_stocks_cip[date].mean() sector_avg = all_stocks_cip[date].filter(items=sector_companies).mean() stock_move = all_stocks_cip.at[stock, date] state.update({ 'market_avg': market_avg, 'sector_avg': sector_avg, 'stock_move': stock_move, 'date': date }) points += sum(map(lambda r: r(state), rules)) rows.append({ 'points': points, 'stock': stock, 'date': date }) df = pd.DataFrame.from_records(rows) df['date'] = pd.to_datetime(df['date']) point_score_plot = plot_series(df, x='date', y='points') return point_score_plot
def plot_point_scores(stock: str, sector_companies, all_stocks_cip: pd.DataFrame, rules): """ Visualise the stock in terms of point scores as described on the stock view page. Rules to apply can be specified by rules (default rules are provided by rule_*()) Points are lost for equivalent downturns and the result plotted. All rows in all_stocks_cip will be used to calculate the market average on a given trading day, whilst only sector_companies will be used to calculate the sector average. A utf-8 base64 encoded plot image is returned """ assert len(stock) >= 3 assert all_stocks_cip is not None assert rules is not None and len(rules) > 0 rows = [] points = 0 day_low_high_df = day_low_high(stock, all_dates=all_stocks_cip.columns) state = { "day_low_high_df": day_low_high_df, # never changes each day, so we init it here "all_stocks_change_in_percent_df": all_stocks_cip, "stock": stock, "daily_range_threshold": 0.20, # 20% at either end of the daily range gets a point } net_points_by_rule = defaultdict(int) for date in all_stocks_cip.columns: market_avg = all_stocks_cip[date].mean() sector_avg = all_stocks_cip[date].filter(items=sector_companies).mean() stock_move = all_stocks_cip.at[stock, date] state.update( { "market_avg": market_avg, "sector_avg": sector_avg, "stock_move": stock_move, "date": date, } ) points += sum(map(lambda r: r(state), rules)) for r in rules: k = r.__name__ if k.startswith("rule_"): k = k[5:] net_points_by_rule[k] += r(state) rows.append({"points": points, "stock": stock, "date": date}) df = pd.DataFrame.from_records(rows) df["date"] = pd.to_datetime(df["date"]) point_score_plot = plot_series(df, x="date", y="points") rows = [] for k, v in net_points_by_rule.items(): rows.append({"rule": str(k), "net_points": v}) df = pd.DataFrame.from_records(rows) net_rule_contributors_plot = ( p9.ggplot(df, p9.aes(x="rule", y="net_points")) + p9.labs(x="Rule", y="Contribution to points by rule") + p9.geom_bar(stat="identity") + p9.theme(axis_text_y=p9.element_text(size=7), subplots_adjust={"left": 0.2}) + p9.coord_flip() ) return point_score_plot, plot_as_inline_html_data(net_rule_contributors_plot)
def test_day_low_high( quotation_fixture, ): # pylint: disable=unused-argument,redefined-outer-name # NB: dates and stock must correspond to quotation_fixture df = day_low_high("ABC", all_dates=["2021-01-01", "2021-01-02", "2021-01-03"]) assert set(df.columns) == set( ["day_low_price", "day_high_price", "last_price", "volume", "date"] ) result = pd.Series(df["day_high_price"] - df["day_low_price"]) expected_result = pd.Series( {"2021-01-01": 0.3, "2021-01-02": 0.30, "2021-01-03": 0.3} ) pd.testing.assert_series_equal(result, expected_result, check_names=False)
def detect_outliers(stocks: list, all_stocks_cip: pd.DataFrame, rules=None): """ Returns a dataframe describing those outliers present in stocks based on the provided rules. """ if rules is None: rules = default_point_score_rules() str_rules = { str(r):r for r in rules } rows = [] stocks_by_sector_df = stocks_by_sector() # NB: ETFs in watchlist will have no sector stocks_by_sector_df.index = stocks_by_sector_df['asx_code'] for stock in stocks: #print("Processing stock: ", stock) try: sector = stocks_by_sector_df.at[stock, 'sector_name'] sector_companies = list(stocks_by_sector_df.loc[stocks_by_sector_df['sector_name'] == sector].asx_code) # day_low_high() may raise KeyError when data is currently being fetched, so it appears here... day_low_high_df = day_low_high(stock, all_stocks_cip.columns) except KeyError: warning(None, "Unable to locate watchlist entry: {} - continuing without it".format(stock)) continue state = { 'day_low_high_df': day_low_high_df, # never changes each day, so we init it here 'all_stocks_change_in_percent_df': all_stocks_cip, 'stock': stock, 'daily_range_threshold': 0.20, # 20% at either end of the daily range gets a point } points_by_rule = defaultdict(int) for date in all_stocks_cip.columns: market_avg = all_stocks_cip[date].mean() sector_avg = all_stocks_cip[date].filter(items=sector_companies).mean() stock_move = all_stocks_cip.at[stock, date] state.update({ 'market_avg': market_avg, 'sector_avg': sector_avg, 'stock_move': stock_move, 'date': date }) for rule_name, rule in str_rules.items(): points_by_rule[rule_name] += rule(state) d = { 'stock': stock } d.update(points_by_rule) rows.append(d) df = pd.DataFrame.from_records(rows) df = df.set_index('stock') print(df) from pyod.models.iforest import IForest clf = IForest() clf.fit(df) scores = clf.predict(df) results = [row[0] for row, value in zip(df.iterrows(), scores) if value > 0] #print(results) print("Found {} outlier stocks".format(len(results))) return results
def detect_outliers(stocks: list, all_stocks_cip: pd.DataFrame, rules=None): """ Returns a dataframe describing those outliers present in stocks based on the provided rules. All_stocks_cip is the "change in percent" for at least the stocks present in the specified list """ if rules is None: rules = default_point_score_rules() str_rules = {str(r): r for r in rules} rows = [] stocks_by_sector_df = (stocks_by_sector() ) # NB: ETFs in watchlist will have no sector stocks_by_sector_df.index = stocks_by_sector_df["asx_code"] for stock in stocks: # print("Processing stock: ", stock) try: sector = stocks_by_sector_df.at[stock, "sector_name"] sector_companies = list(stocks_by_sector_df.loc[ stocks_by_sector_df["sector_name"] == sector].asx_code) # day_low_high() may raise KeyError when data is currently being fetched, so it appears here... day_low_high_df = day_low_high(stock, all_stocks_cip.columns) except KeyError: warning( None, "Unable to locate watchlist entry: {} - continuing without it". format(stock), ) continue state = { "day_low_high_df": day_low_high_df, # never changes each day, so we init it here "all_stocks_change_in_percent_df": all_stocks_cip, "stock": stock, "daily_range_threshold": 0.20, # 20% at either end of the daily range gets a point } points_by_rule = defaultdict(int) for date in all_stocks_cip.columns: market_avg = all_stocks_cip[date].mean() sector_avg = all_stocks_cip[date].filter( items=sector_companies).mean() stock_move = all_stocks_cip.at[stock, date] state.update({ "market_avg": market_avg, "sector_avg": sector_avg, "stock_move": stock_move, "date": date, }) for rule_name, rule in str_rules.items(): try: points_by_rule[rule_name] += rule(state) except TypeError: # handle nan's in dataset safely pass d = {"stock": stock} d.update(points_by_rule) rows.append(d) df = pd.DataFrame.from_records(rows) df = df.set_index("stock") # print(df) clf = IForest() clf.fit(df) scores = clf.predict(df) results = [ row[0] for row, value in zip(df.iterrows(), scores) if value > 0 ] # print(results) print("Found {} outlier stocks".format(len(results))) return results