def moving_zscore(df, periods, rolling=True, new_names=None, group_index=TICKER): """ Calculate the Moving Z-Score for all stocks in the given DataFrame. :param df: Pandas DataFrame e.g. with P/Sales ratios but could have any data. The DataFrame may contain data for one or more stocks. :param periods: Integer with the number of time-steps to calculate Z-Score for. If `rolling==True` then it is the length of the moving window. If `rolling==False` then it is the minimum window-length before the Z-Score is calculated. :param rolling: Boolean whether to use a rolling window (True), or to use all preceding data-points (False). :param new_names: Dict or function for mapping / converting the column-names. If `df` is a Pandas Series, then this is assumed to be a string. :param group_index: If the DataFrame has a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :return: Pandas DataFrame with the Moving Z-Score. """ # Helper-function for calculating the Moving Z-Score for a single stock. if rolling: # Calculate Z-Score for a rolling window. def _moving_zscore(df): x = df.rolling(window=periods) return (df - x.mean()) / x.std() else: # Calculate Z-Score from the beginning. def _moving_zscore(df): x = df.expanding(min_periods=periods) return (df - x.mean()) / x.std() # Calculate Moving Z-Score. Use Pandas groupby if `df` has multiple stocks. df_result = apply(df=df, func=_moving_zscore, group_index=group_index) # Rename the columns. if new_names is not None: rename_columns(df=df_result, new_names=new_names, inplace=True) return df_result
def asfreq(df, freq, method=None, group_index=TICKER, **kwargs): """ Simple resampling of a Pandas DataFrame or Series with either a DatetimeIndex or MultiIndex. This can be used to resample financial data for a single company, or resample data for multiple companies in a single DataFrame. This only provides options for forward- and backward-fill of new data-points. If you need other filling methods, then you should use the :obj:`~simfin.resample.resample` function. :param df: Pandas DataFrame or Series assumed to have either a DatetimeIndex or a MultiIndex with 2 indices, one of which is a DatetimeIndex and the other is given by the arg `group_index`. :param freq: Resampling frequency e.g. 'D' for daily. This is passed directly to the Pandas function which has more options: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects :param method: String for the method of filling in empty values. Valid options: - `None`, do not fill in the empty values. - 'ffill' is forward-fill with last known values. - 'bfill' is backward-fill using future values. This is passed directly to the Pandas function which has more options: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html :param group_index: If `df` has a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :param **kwargs: Optional keyword-arguments passed directly to Pandas `asfreq` function. Valid arguments: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html :return: Resampled DataFrame or Series. """ # Function to apply on a DataFrame with a single stock. def _asfreq(df_grp): return df_grp.asfreq(freq=freq, method=method, **kwargs) # Apply the function and use groupby if DataFrame has multiple stocks. df_result = apply(df=df, func=_asfreq, group_index=group_index) return df_result
def max_drawdown(df, window=None, group_index=TICKER): """ Calculate the Maximum Drawdown for all stocks in the given DataFrame. :param df: Pandas DataFrame typically with share-prices but could have any data. The DataFrame may contain data for one or more stocks. :param window: If `None` then calculate the Max Drawdown from the beginning. If an integer then calculate the Max Drawdown for a rolling window of that length. :param group_index: If the DataFrame has a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :return: Pandas DataFrame with the Max Drawdown. """ # Helper-function for calculating the Max Drawdown for a single stock. if window is None: # Calculate Max Drawdown from the beginning. def _max_drawdown(df): return df / df.cummax() - 1.0 else: # Calculate Max Drawdown for a rolling window. def _max_drawdown(df): return df / df.rolling(window=window).max() - 1.0 # Calculate Max Drawdown. Use Pandas groupby if `df` has multiple stocks. df_result = apply(df=df, func=_max_drawdown, group_index=group_index) return df_result
def trade_signals(df, signal1, signal2, group_index=TICKER): """ Create Buy / Sell / Hold signals from two signals in the given DataFrame. - If `df[signal1] >= df[signal2]` create a Hold signal. - If `df[signal1]` crosses above `df[signal2]` create a Buy signal. - if `df[signal1]` crosses below `df[signal2]` create a Sell signal. This function can take a while to compute, so it will create a cache-file if you pass the arg `cache_refresh`. The next time you call this function, the cache-file will get loaded if it is more recent than specified by `cache_refresh`, otherwise the function will get computed again and the result saved in the cache-file for future use. See the documentation for the :obj:`~simfin.cache.cache` wrapper for details on its arguments. .. warning:: You **MUST** use keyword arguments to this function, otherwise the first unnamed arguments would get passed to the :obj:`~simfin.cache.cache` wrapper instead. :param df: Pandas DataFrame with columns `signal1` and `signal2`. May contain data for one or more stocks. :param signal1: String with the name of a column in `df`. :param signal2: String with the name of a column in `df`. :param group_index: If the DataFrame has a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :return: Pandas Dataframe with BUY, SELL, HOLD signals. """ # Helper-function for calculating signals for a single stock. def _signals(df): # Create new DataFrame for the signals. # Setting the index improves performance. df_signals = pd.DataFrame(index=df.index) # Boolean whether signal1 >= signal2. df_above = (df[signal1] >= df[signal2]) # Boolean whether to buy the stock. df_signals[BUY] = df_above & ~df_above.shift(1, fill_value=True) # Boolean whether to sell the stock. df_signals[SELL] = ~df_above & df_above.shift(1, fill_value=False) # Boolean whether to keep holding the stock. df_signals[HOLD] = df_above return df_signals # Calculate signals and use Pandas groupby if `df` has multiple stocks. df_signals = apply(df=df, func=_signals, group_index=group_index) # Sort the columns by their names. df_signals.sort_index(axis='columns', inplace=True) return df_signals
def val_signals(df_prices, df_income_ttm, df_balance_ttm, df_cashflow_ttm, fill_method='ffill', offset=None, func=None, date_index=REPORT_DATE, shares_index=SHARES_DILUTED, group_index=TICKER): """ Calculate valuation signals such as P/E and P/Sales ratios for all stocks in the given DataFrames. This function can take a while to compute, so it will create a cache-file if you pass the arg `cache_refresh`. The next time you call this function, the cache-file will get loaded if it is more recent than specified by `cache_refresh`, otherwise the function will get computed again and the result saved in the cache-file for future use. See the documentation for the :obj:`~simfin.cache.cache` wrapper for details on its arguments. .. warning:: You **MUST** use keyword arguments to this function, otherwise the first unnamed arguments would get passed to the :obj:`~simfin.cache.cache` wrapper instead. :param df_prices: Pandas DataFrame with share-prices for one or more stocks. :param df_income_ttm: Pandas DataFrame with Income Statement TTM data for one or more stocks. :param df_balance_ttm: Pandas DataFrame with Balance Sheet TTM data for one or more stocks. :param df_cashflow_ttm: Pandas DataFrame with Cash-Flow Statement TTM data for one or more stocks. :param fill_method: String or callable for the method of filling in empty values when reindexing financial data to daily data-points. See :obj:`~simfin.resample.reindex` for valid options. :param offset: Pandas DateOffset added to the date-index of the Pandas DataFrames with the financial data. Example: `pd.DateOffset(days=60)` This is useful if you want to add a lag of e.g. 60 days to the dates of financial reports with Income Statements, Balance Sheets, and Cash-Flow Statements, because the REPORT_DATE is not when it was actually made available to the public, which can be 1, 2 or even 3 months after the REPORT_DATE. See :obj:`~simfin.utils.add_date_offset` for more details. :param func: Function to apply on a per-stock basis on the financial data, before calculating the valuation signals. This is useful e.g. to calculate multi-year averages of the Net Income and Revenue and use those when calculating P/E and P/Sales ratios. For example, to calculate the 2-year averages of TTM data: `func = lambda df: 0.5 * (df + df.shift(4))` :param date_index: Name of the date-column for the financial data e.g. REPORT_DATE. :param shares_index: String with the column-name for the share-counts. SHARES_DILUTED takes the potential diluting impact of stock-options into account, so it results in more conservative valuation ratios than SHARES_BASIC. :param group_index: If the DataFrames have a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :return: Pandas DataFrame with valuation signals. """ # Get the required data from the Income Statements. columns = [REVENUE, NET_INCOME_COMMON, SHARES_BASIC, SHARES_DILUTED] df_inc = df_income_ttm[columns] # Get the required data from the Balance Sheets. columns = [ TOTAL_CUR_ASSETS, CASH_EQUIV_ST_INVEST, ACC_NOTES_RECV, INVENTORIES, TOTAL_LIABILITIES, TOTAL_EQUITY ] df_bal = df_balance_ttm[columns] # Get the required data from the Cash-Flow Statements. columns = [DIVIDENDS_PAID] df_cf = df_cashflow_ttm[columns] # Combine all the data. This creates a new copy that we can add columns to. df = pd.concat([df_inc, df_bal, df_cf], axis=1) # Calculate derived financial data such as Free Cash Flow (FCF), # and add it as new columns to the DataFrame. # This is only TTM data with 4 data-points per year, so it is # faster than calculating it for the daily data-points below. df[FCF] = free_cash_flow(df_cashflow_ttm) df[NCAV] = ncav(df_balance_ttm) df[NETNET] = netnet(df_balance_ttm) # Add offset / lag to the index-dates of the financial data. if offset is not None: df = add_date_offset(df=df, offset=offset, date_index=date_index) # Copy the number of shares before applying the user-supplied function, # which might change the number of shares in the original DataFrame df. # This tries to use the given share-counts (e.g. SHARES_DILUTED) and # fill in missing values with the other share-counts (e.g. SHARES_BASIC). df_shares = shares(df=df, index=shares_index) # Reindex the share-counts to daily data-points. df_shares_daily = reindex(df_src=df_shares, df_target=df_prices, method=fill_method, group_index=group_index) # Process the financial data using the user-supplied function # e.g. to calculate multi-year averages of Earnings, Sales, etc. if func is not None: df = apply(df=df, func=func, group_index=group_index) # Calculate Per-Share numbers. It is important to use the share-count # from before the user-supplied function was applied. df_per_share = df.div(df_shares, axis=0) # Reindex the per-share financial data to daily data-points. df_daily = reindex(df_src=df_per_share, df_target=df_prices, method=fill_method, group_index=group_index) # Create new DataFrame for the signals. # Setting the index improves performance. df_signals = pd.DataFrame(index=df_prices.index) # Use the closing share-price for all signals. df_price = df_prices[CLOSE] # Calculate basic signals. df_signals[PSALES] = df_price / df_daily[REVENUE] df_signals[PE] = df_price / df_daily[NET_INCOME_COMMON] df_signals[PFCF] = df_price / df_daily[FCF] df_signals[PBOOK] = df_price / df_daily[TOTAL_EQUITY] # Calculate Price / Net Current Asset Value (NCAV). # This measures the share-price relative to estimated liquidation value. df_signals[P_NCAV] = df_price / df_daily[NCAV] # Calculate Price / Net-Net Working Capital (NNWC aka. NetNet). # This measures the share-price relative to a more conservative estimate # of liquidation value, which values the Receivables and Inventories at # a discount to their book-value. df_signals[P_NETNET] = df_price / df_daily[NETNET] # Calculate Earnings Yield (inverse of the P/E ratio). df_signals[EARNINGS_YIELD] = df_daily[NET_INCOME_COMMON] / df_price # Calculate FCF Yield (inverse of the P/FCF ratio). df_signals[FCF_YIELD] = df_daily[FCF] / df_price # Calculate Dividend Yield using TTM Cash-Flow data, which is easier than # using df_prices[DIVIDEND] because the actual payment dates may differ # slightly from one year to the next, making it difficult to calculate TTM. # Note the negation because DIVIDENDS_PAID is negative. df_signals[DIV_YIELD] = -df_daily[DIVIDENDS_PAID] / df_price # Calculate Market Capitalization. df_signals[MARKET_CAP] = df_shares_daily * df_price # Sort the columns by their names. df_signals.sort_index(axis='columns', inplace=True) return df_signals
def growth_signals(df_income_ttm, df_income_qrt, df_balance_ttm, df_balance_qrt, df_cashflow_ttm, df_cashflow_qrt, df_prices=None, fill_method='ffill', offset=None, func=None, date_index=REPORT_DATE, group_index=TICKER): """ Calculate growth-signals such as Sales Growth, Earnings Growth, etc. for all stocks in the given DataFrames. Three growth-signals are given for each type of financial data, e.g.: - SALES_GROWTH is calculated from the TTM Revenue divided by the TTM Revenue from one year ago. - SALES_GROWTH_YOY is calculated from the Quarterly Revenue divided by the Quarterly Revenue from one year ago. - SALES_GROWTH_QOQ is calculated from the Quarterly Revenue divided by the Quarterly Revenue from the previous quarter. This function can take a while to compute, so it will create a cache-file if you pass the arg `cache_refresh`. The next time you call this function, the cache-file will get loaded if it is more recent than specified by `cache_refresh`, otherwise the function will get computed again and the result saved in the cache-file for future use. See the documentation for the :obj:`~simfin.cache.cache` wrapper for details on its arguments. .. warning:: You **MUST** use keyword arguments to this function, otherwise the first unnamed arguments would get passed to the :obj:`~simfin.cache.cache` wrapper instead. :param df_prices: Optional Pandas DataFrame with share-prices for one or more stocks. If not `None`, then the signals will be reindexed to the same daily data-points as `df_prices`, otherwise the signals will be quarterly. :param df_income_ttm: Pandas DataFrame with Income Statement TTM data for one or more stocks. :param df_income_qrt: Pandas DataFrame with Income Statement Quarterly data for one or more stocks. :param df_balance_ttm: Pandas DataFrame with Balance Sheet TTM data for one or more stocks. :param df_balance_qrt: Pandas DataFrame with Balance Sheet Quarterly data for one or more stocks. :param df_cashflow_ttm: Pandas DataFrame with Cash-Flow Statement TTM data for one or more stocks. :param df_cashflow_qrt: Pandas DataFrame with Cash-Flow Statement Quarterly data for one or more stocks. :param func: Function to apply on a per-stock basis after the signals have been calculated, but before they have been reindexed to daily data-points. This is useful e.g. to calculate multi-year averages. For example, to calculate the 2-year averages of TTM data: `func = lambda df: 0.5 * (df + df.shift(4))` :param fill_method: String or callable for the method of filling in empty values when reindexing financial data to daily data-points. See :obj:`~simfin.resample.reindex` for valid options. :param offset: Pandas DateOffset added to the date-index of the Pandas DataFrames with the financial data. Example: `pd.DateOffset(days=60)` This is useful if you want to add a lag of e.g. 60 days to the dates of financial reports with Income Statements, Balance Sheets, and Cash-Flow Statements, because the REPORT_DATE is not when it was actually made available to the public, which can be 1, 2 or even 3 months after the REPORT_DATE. See :obj:`~simfin.utils.add_date_offset` for more details. :param date_index: Name of the date-column for the financial data e.g. REPORT_DATE. :param group_index: If the DataFrames have a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :return: Pandas DataFrame with growth signals. """ # This implementation uses sf.rel_change() to calculate the growth-rates, # which means that several groupby operations are performed. But this is # easier to implement and for large DataFrames it is only about 10% slower # than using sf.apply() with a function like _signals() in fin_signals(). ############################### # Annual growth using TTM data. # Select and combine the data we need. df_ttm1 = df_income_ttm[[REVENUE, NET_INCOME]] df_ttm2 = free_cash_flow(df_cashflow_ttm) df_ttm3 = df_balance_ttm[[TOTAL_ASSETS]] df_ttm = pd.concat([df_ttm1, df_ttm2, df_ttm3], axis=1) # Dict mapping to the new column-names. new_names = { REVENUE: SALES_GROWTH, NET_INCOME: EARNINGS_GROWTH, FCF: FCF_GROWTH, TOTAL_ASSETS: ASSETS_GROWTH } # Calculate the growth-rates. df_growth = rel_change(df=df_ttm, freq='q', quarters=4, future=False, annualized=False, new_names=new_names) ############################################# # Year-Over-Year growth using Quarterly data. # Select and combine the data we need. df_qrt1 = df_income_qrt[[REVENUE, NET_INCOME]] df_qrt2 = free_cash_flow(df_cashflow_qrt) df_qrt3 = df_balance_qrt[[TOTAL_ASSETS]] df_qrt = pd.concat([df_qrt1, df_qrt2, df_qrt3], axis=1) # Dict mapping to the new column-names. new_names = { REVENUE: SALES_GROWTH_YOY, NET_INCOME: EARNINGS_GROWTH_YOY, FCF: FCF_GROWTH_YOY, TOTAL_ASSETS: ASSETS_GROWTH_YOY } # Calculate the growth-rates. df_growth_yoy = rel_change(df=df_qrt, freq='q', quarters=4, future=False, annualized=False, new_names=new_names) ######################################################## # Quarter-Over-Quarter growth using Quarterly data. # Note: This uses the same Quarterly DataFrame as above. # Dict mapping to the new column-names. new_names = { REVENUE: SALES_GROWTH_QOQ, NET_INCOME: EARNINGS_GROWTH_QOQ, FCF: FCF_GROWTH_QOQ, TOTAL_ASSETS: ASSETS_GROWTH_QOQ } # Calculate the growth-rates. df_growth_qoq = rel_change(df=df_qrt, freq='q', quarters=1, future=False, annualized=False, new_names=new_names) ################## # Post-processing. # Combine into a single DataFrame. df_signals = pd.concat([df_growth, df_growth_yoy, df_growth_qoq], axis=1) # Add offset / lag to the index-dates of the signals. if offset is not None: df_signals = add_date_offset(df=df_signals, offset=offset, date_index=date_index) # Process the signals using the supplied function e.g. to calculate averages. if func is not None: df_signals = apply(df=df_signals, func=func, group_index=group_index) # Reindex to the same daily data-points as the share-prices. if df_prices is not None: df_signals = reindex(df_src=df_signals, df_target=df_prices, method=fill_method, group_index=group_index) # Sort the columns by their names. df_signals.sort_index(axis='columns', inplace=True) return df_signals
def fin_signals(df_income_ttm, df_balance_ttm, df_prices=None, offset=None, func=None, fill_method='ffill', date_index=REPORT_DATE, group_index=TICKER): """ Calculate financial signals such as Net Profit Margin, Debt Ratio, ROA, etc. for all stocks in the given DataFrames. This function can take a while to compute, so it will create a cache-file if you pass the arg `cache_refresh`. The next time you call this function, the cache-file will get loaded if it is more recent than specified by `cache_refresh`, otherwise the function will get computed again and the result saved in the cache-file for future use. See the documentation for the :obj:`~simfin.cache.cache` wrapper for details on its arguments. .. warning:: You **MUST** use keyword arguments to this function, otherwise the first unnamed arguments would get passed to the :obj:`~simfin.cache.cache` wrapper instead. :param df_prices: Optional Pandas DataFrame with share-prices for one or more stocks. If not `None`, then the signals will be reindexed to the same daily data-points as `df_prices`, otherwise the signals will be quarterly. :param df_income_ttm: Pandas DataFrame with Income Statement TTM data for one or more stocks. :param df_balance_ttm: Pandas DataFrame with Balance Sheet TTM data for one or more stocks. :param df_cashflow_ttm: Pandas DataFrame with Cash-Flow Statement TTM data for one or more stocks. :param func: Function to apply on a per-stock basis after the signals have been calculated, but before they have been reindexed to daily data-points. This is useful e.g. to calculate multi-year averages. For example, to calculate the 2-year averages of TTM data: `func = lambda df: 0.5 * (df + df.shift(4))` :param fill_method: String or callable for the method of filling in empty values when reindexing financial data to daily data-points. See :obj:`~simfin.resample.reindex` for valid options. :param offset: Pandas DateOffset added to the date-index of the Pandas DataFrames with the financial data. Example: `pd.DateOffset(days=60)` This is useful if you want to add a lag of e.g. 60 days to the dates of financial reports with Income Statements, Balance Sheets, and Cash-Flow Statements, because the REPORT_DATE is not when it was actually made available to the public, which can be 1, 2 or even 3 months after the REPORT_DATE. See :obj:`~simfin.utils.add_date_offset` for more details. :param date_index: Name of the date-column for the financial data e.g. REPORT_DATE. :param group_index: If the DataFrames have a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :return: Pandas DataFrame with financial signals. """ # Helper-function for calculating signals for a single stock. def _signals(df): # Create new DataFrame for the signals. # Setting the index improves performance. df_signals = pd.DataFrame(index=df.index) # Net Profit Margin. df_signals[NET_PROFIT_MARGIN] = df[NET_INCOME] / df[REVENUE] # Gross Profit Margin. df_signals[GROSS_PROFIT_MARGIN] = df[GROSS_PROFIT] / df[REVENUE] # Interest Coverage. # Note: INTEREST_EXP_NET must be negated. df_signals[INTEREST_COV] = df[OPERATING_INCOME] / -df[INTEREST_EXP_NET] # Current Ratio = Current Assets / Current Liabilities. df_signals[CURRENT_RATIO] = df[TOTAL_CUR_ASSETS] / df[TOTAL_CUR_LIAB] # Debt Ratio = (Short-term Debt + Long-term Debt) / Total Assets. df_signals[DEBT_RATIO] = (df[ST_DEBT] + df[LT_DEBT]) / df[TOTAL_ASSETS] # NOTE: There are different ways of calculating ROA, ROE, # ASSET_TURNOVER, etc. See Tutorial 04. For example, we could use the # Assets or Equity from last year instead of from the current year, # but the resulting ROA, ROE, etc. are usually very similar, and using # last year's Assets or Equity would cause us to loose one year of # data-points for the signals we are calculating here. # Return on Assets = Net Income / Total Assets. See note above. df_signals[ROA] = df[NET_INCOME] / df[TOTAL_ASSETS] # Return on Equity = Net Income / Total Equity. See note above. df_signals[ROE] = df[NET_INCOME] / df[TOTAL_EQUITY] # Asset Turnover = Revenue / Total Assets. See note above. df_signals[ASSET_TURNOVER] = df[REVENUE] / df[TOTAL_ASSETS] return df_signals # Get relevant data from Income Statements. columns = [ REVENUE, GROSS_PROFIT, OPERATING_INCOME, INTEREST_EXP_NET, NET_INCOME ] df1 = df_income_ttm[columns] # Get relevant data from Balance Sheets. columns = [ TOTAL_ASSETS, TOTAL_CUR_ASSETS, TOTAL_CUR_LIAB, TOTAL_EQUITY, ST_DEBT, LT_DEBT ] df2 = df_balance_ttm[columns] # Combine the data into a single DataFrame. df = pd.concat([df1, df2], axis=1) # Add offset / lag to the index-dates of the financial data. if offset is not None: df = add_date_offset(df=df, offset=offset, date_index=date_index) # Calculate signals and use Pandas groupby if `df` has multiple stocks. df_signals = apply(df=df, func=_signals, group_index=group_index) # Process the signals using the supplied function e.g. to calculate averages. if func is not None: df_signals = apply(df=df_signals, func=func, group_index=group_index) # Reindex to the same daily data-points as the share-prices. if df_prices is not None: df_signals = reindex(df_src=df_signals, df_target=df_prices, method=fill_method, group_index=group_index) # Sort the columns by their names. df_signals.sort_index(axis='columns', inplace=True) return df_signals
def price_signals(df_prices, group_index=TICKER): """ Calculate price-signals such as Moving Average and MACD for all stocks in the given DataFrame. This function can take a while to compute, so it will create a cache-file if you pass the arg `cache_refresh`. The next time you call this function, the cache-file will get loaded if it is more recent than specified by `cache_refresh`, otherwise the function will get computed again and the result saved in the cache-file for future use. See the documentation for the :obj:`~simfin.cache.cache` wrapper for details on its arguments. .. warning:: You **MUST** use keyword arguments to this function, otherwise the first unnamed arguments would get passed to the :obj:`~simfin.cache.cache` wrapper instead. :param df_prices: Pandas DataFrame with share-prices for multiple stocks. :param group_index: If the DataFrame has a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :return: Pandas DataFrame with price-signals. """ # Helper-function for calculating signals for a single stock. def _signals(df_prices): # Create new DataFrame for the signals. # Setting the index improves performance. df_signals = pd.DataFrame(index=df_prices.index) # Use the closing share-price for all the signals. df_price = df_prices[CLOSE] # Moving Average for past 20 days. df_signals[MAVG_20] = df_price.rolling(window=20).mean() # Moving Average for past 200 days. df_signals[MAVG_200] = df_price.rolling(window=200).mean() # Exponential Moving Average for past 20 days. df_signals[EMA] = df_price.ewm(span=20).mean() # Moving Average Convergence Divergence for 12 and 26 days. # https://en.wikipedia.org/wiki/MACD df_signals[MACD] = df_price.ewm(span=12).mean() \ - df_price.ewm(span=26).mean() # MACD with extra smoothing by Exp. Moving Average for 9 days. df_signals[MACD_EMA] = df_signals[MACD].ewm(span=9).mean() return df_signals # Calculate signals and use Pandas groupby if `df` has multiple stocks. df_signals = apply(df=df_prices, func=_signals, group_index=group_index) # Sort the columns by their names. df_signals.sort_index(axis='columns', inplace=True) return df_signals
def volume_signals(df_prices, df_shares, window=20, fill_method='ffill', offset=None, date_index=REPORT_DATE, shares_index=SHARES_BASIC, group_index=TICKER): """ Calculate signals for the daily trading-volume of stocks, such as: - REL_VOL: The daily trading-volume relative to its moving average. - VOLUME_MCAP: The Market-Capitalization of the daily trading volume. - VOLUME_TURNOVER: Trading-volume relative to the shares outstanding. The moving-average is calculated in different ways for the signals. For REL_VOL it is a part of the formula definition. For VOLUME_MCAP and VOLUME_TURNOVER the moving-average is calculated afterwards. This function can take a while to compute, so it will create a cache-file if you pass the arg `cache_refresh`. The next time you call this function, the cache-file will get loaded if it is more recent than specified by `cache_refresh`, otherwise the function will get computed again and the result saved in the cache-file for future use. See the documentation for the :obj:`~simfin.cache.cache` wrapper for details on its arguments. .. warning:: You **MUST** use keyword arguments to this function, otherwise the first unnamed arguments would get passed to the :obj:`~simfin.cache.cache` wrapper instead. :param df_prices: Pandas DataFrame with share-prices for multiple stocks. :param df_shares: Pandas DataFrame with both columns SHARES_BASIC and SHARES_DILUTED e.g. `df_shares=df_income_ttm` :param window: Integer for the number of days to use in moving-average calculations. :param fill_method: String or callable for the method of filling in empty values when reindexing financial data to daily data-points. See :obj:`~simfin.resample.reindex` for valid options. :param offset: Pandas DateOffset added to the date-index of `df_shares`. Example: `pd.DateOffset(days=60)` See :obj:`~simfin.utils.add_date_offset` for more details. :param date_index: Name of the date-column for `df_shares` e.g. REPORT_DATE. :param shares_index: Name of the column for share-counts in `df_shares`. SHARES_DILUTED takes the potential diluting impact of stock-options into account, while SHARES_BASIC does not take potential dilution into account. :param group_index: If the DataFrame has a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :return: Pandas DataFrame with volume-signals. """ # Copy the given share-counts (e.g. SHARES_BASIC) and fill in missing # values with the other share-counts (e.g. SHARES_DILUTED). df_shares = shares(df=df_shares, index=shares_index) # Helper-function for calculating signals for a single stock. def _signals(df): # Create new DataFrame for the signals. # Setting the index improves performance. df_signals = pd.DataFrame(index=df.index) # Get the relevant data. df_price = df[CLOSE] df_volume = df[VOLUME] # Share-counts from financial reports, reindexed to daily data-points. df_shares_daily = df[shares_index] # Moving average for the daily trading volume. df_volume_mavg = df_volume.rolling(window=window).mean() # Last trading volume relative to its moving average. df_rel_vol = df_volume / df_volume_mavg df_signals[REL_VOL] = np.log(df_rel_vol) # Calculate Market-Capitalization of the daily trading-volume. df_vol_mcap = df_volume * df_price df_signals[VOLUME_MCAP] = df_vol_mcap.rolling(window=window).mean() # Calculate Volume Turnover as the daily trading-volume # divided by the total number of shares outstanding. df_vol_turn = df_volume / df_shares_daily df_signals[VOLUME_TURNOVER] = df_vol_turn.rolling(window=window).mean() return df_signals # Add offset / lag to the dates of the share-counts. if offset is not None: df_shares = add_date_offset(df=df_shares, offset=offset, date_index=date_index) # Reindex the share-counts to daily data-points. df_shares_daily = reindex(df_src=df_shares, df_target=df_prices, method=fill_method, group_index=group_index) # Combine the relevant data into a single DataFrame. dfs = [df_prices[[CLOSE, VOLUME]], df_shares_daily] df = pd.concat(dfs, axis=1) # Calculate signals and use Pandas groupby if `df` has multiple stocks. df_signals = apply(df=df, func=_signals, group_index=group_index) # Sort the columns by their names. df_signals.sort_index(axis='columns', inplace=True) return df_signals
def reindex(df_src, df_target, group_index=TICKER, union=True, only_target_index=True, method=None, **kwargs): """ Reindex a source Pandas DataFrame or Series with either a DatetimeIndex or MultiIndex, so that it conforms to the index of a target DataFrame. This can be used to resample financial data for a single company, or resample data for multiple companies in a single DataFrame. It differs from the :obj:`~simfin.resample.resample` function because the resampled data has the same index as the target DataFrame. This is useful e.g. when upsampling annual or quarterly financial data to daily data that matches the share-price data, even beyond the last date of the financial data. By default this function uses a union of the indices of the source and target DataFrames, because otherwise data-points from the source might be lost if those particular dates do not exist in the target DataFrame. We can still ensure the resulting DataFrame only has the index of the target DataFrame, by setting `only_target_index=True`. This is explained in more detail in `Tutorial 02`_ on resampling. :param df_src: Pandas DataFrame or Series assumed to have either a DatetimeIndex or a MultiIndex with 2 indices, one of which is a DatetimeIndex and the other is given by the arg `group_index`. :param df_target: Pandas DataFrame or Series assumed to have an index of the same type as `df_src`. For example, they can both be a DatetimeIndex, or they can both be a MultiIndex with 2 indices, of which one must be a DatetimeIndex. The names of the indices can be different. :param group_index: If `df_src` and `df_target` have a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :param method: String or callable for the method of filling in empty values. You should not pass a summarizing method e.g. the string 'mean' or a similar lambda-function. You should only use filling methods such as forward-fill or interpolation. Valid options: - 'ffill' is forward-fill with last known values. - 'bfill' is backward-fill using future values. - 'linear' is linear interpolation between known values. - 'quadratic' is quadratic interpolation between known values. Can also be a callable function or lambda-function which is called after the reindexing, e.g.: `method=lambda x: x.interpolate(method='nearest')` :param union: Boolean. If True then use the union of the indices from `df_src` and `df_target`. If False then only use the index from `df_target`. :param only_target_index: This is only used if `union==True`. Boolean whether to perform an additional reindex operation to ensure the final index matches the index of `df_target`, otherwise it might have rows from `df_src` that do not exist in the index of `df_target`. :param **kwargs: Optional keyword-arguments passed directly to Pandas `reindex` function. Valid arguments: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reindex.html :return: Resampled DataFrame or Series. """ assert isinstance(df_src, (pd.DataFrame, pd.Series)) assert isinstance(df_target, (pd.DataFrame, pd.Series)) # This is not a "deep" type comparison. Two MultiIndex could be different. assert (type(df_src.index) == type(df_target.index)) # Convert arg `method` to a function that can be called after reindexing. # For example, if fill_func = lambda x: x.ffill() then we have that # fill_func(df.reindex()) is equivalent to df.reindex().ffill() fill_func = _convert_method_arg(method=method) # Which target index to use? if union: # Use the union of the indices of the source and target DataFrames. new_index = index_union(df_src=df_src, df_target=df_target) else: # Only use the index of the target DataFrame. new_index = df_target.index # Reindex the DataFrame. This works with both DatetimeIndex and MultiIndex. df_result = df_src.reindex(index=new_index, **kwargs) # Apply the fill-function and use groupby if DataFrame has multiple stocks. df_result = apply(df=df_result, func=fill_func, group_index=group_index) # Perform an additional reindex operation to ensure the final # result only contains the rows from df_target. This is only # necessary if we have used the union of the two indices in # the main reindexing. if union and only_target_index: df_result = df_result.reindex(index=df_target.index) return df_result
def resample(df, rule, method='ffill', group_index=TICKER, **kwargs): """ Resample a Pandas DataFrame or Series with either a DatetimeIndex or MultiIndex. This can be used to resample financial data for a single company, or resample data for multiple companies in a single DataFrame. Unlike the :obj:`~simfin.resample.asfreq` function which only allows forward- and backward-fill, this function allows for the use of arbitrary functions, either using string keywords for the most common filling functions, or user-supplied functions for arbitrary filling and summarization. :param df: Pandas DataFrame or Series assumed to have either a DatetimeIndex or a MultiIndex with 2 indices, one of which is a DatetimeIndex and the other is given by the arg `group_index`. :param rule: Resampling frequency e.g. 'D' for daily. This is passed directly to the Pandas function which has more options: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects :param method: String or callable for the method of filling in empty values. Valid options: - 'ffill' is forward-fill with last known values. - 'bfill' is backward-fill using future values. - 'linear' is linear interpolation between known values. - 'quadratic' is quadratic interpolation between known values. - 'mean' is averaging for use when downsampling. Can also be a callable function or lambda-function which is called after the resampling, e.g.: `method=lambda x: x.nearest(limit=100)` :param group_index: If `df` has a MultiIndex then group data using this index-column. By default this is TICKER but it could also be e.g. SIMFIN_ID if you are using that as an index in your DataFrame. :param **kwargs: Optional keyword-arguments passed directly to Pandas resample function. Valid arguments: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.resample.html :return: Resampled DataFrame or Series. """ # Convert arg `method` to a function that can be called after resampling. # For example, if fill_func = lambda x: x.ffill() then we have that # fill_func(df.resample()) is equivalent to df.resample().ffill() fill_func = _convert_method_arg(method=method) # Function to apply on a DataFrame with a single stock. def _resample(df_grp): return fill_func(df_grp.resample(rule=rule, **kwargs)) # Apply the function and use groupby if DataFrame has multiple stocks. df_result = apply(df=df, func=_resample, group_index=group_index) return df_result