def add_fundamental_features(self, prices, features):
        """
        Fundamental features:

        - Enterprise multiple
        - various quarterly values and ratios
        - various trailing-twelve month values and ratios
        """

        closes = prices.loc["Close"]

        # enterprise multiple
        fundamentals = get_sharadar_fundamentals_reindexed_like(
            closes, fields=["EVEBIT", "EBIT"], dimension="ART")
        enterprise_multiples = fundamentals.loc["EVEBIT"]
        ebits = fundamentals.loc["EBIT"]
        # Ignore negative earnings
        enterprise_multiples = enterprise_multiples.where(ebits > 0)
        features["enterprise_multiples_ranks"] = enterprise_multiples.rank(
            axis=1, pct=True).fillna(0.5)

        # Query quarterly fundamentals
        fundamentals = get_sharadar_fundamentals_reindexed_like(
            closes,
            dimension="ARQ",  # As-reported quarterly reports
            fields=[
                "CURRENTRATIO",  # Current ratio
                "DE",  # Debt to Equity Ratio
                "PB",  # Price to Book Value
                "TBVPS",  # Tangible Asset Book Value per Share
                "MARKETCAP",
            ])

        for field in fundamentals.index.get_level_values("Field").unique():
            features["{}_ranks".format(field)] = fundamentals.loc[field].rank(
                axis=1, pct=True).fillna(0.5)

        # Query trailing-twelve-month fundamentals
        fundamentals = get_sharadar_fundamentals_reindexed_like(
            closes,
            dimension="ART",  # As-reported trailing-twelve-month reports
            fields=[
                "ASSETTURNOVER",  # Asset Turnover
                "EBITDAMARGIN",  # EBITDA Margin
                "EQUITYAVG",  # Average Equity
                "GROSSMARGIN",  # Gross Margin
                "NETMARGIN",  # Profit Margin
                "PAYOUTRATIO",  # Payout Ratio
                "PE",  # Price Earnings Damodaran Method
                "PE1",  # Price to Earnings Ratio
                "PS",  # Price Sales (Damodaran Method)
                "PS1",  # Price to Sales Ratio
                "ROA",  # Return on Average Assets
                "ROE",  # Return on Average Equity
                "ROS",  # Return on Sales
            ])

        for field in fundamentals.index.get_level_values("Field").unique():
            features["{}_ranks".format(field)] = fundamentals.loc[field].rank(
                axis=1, pct=True).fillna(0.5)
Ejemplo n.º 2
0
    def load_adjusted_array(self, domain, columns, dates, sids, mask):

        real_sids = [
            self.zipline_sids_to_real_sids[zipline_sid] for zipline_sid in sids
        ]
        reindex_like = pd.DataFrame(None, index=dates, columns=real_sids)
        reindex_like.index.name = "Date"

        out = {}

        columns_by_dimension = defaultdict(list)
        for column in columns:
            dimension = column.dataset.extra_coords["dimension"]
            columns_by_dimension[dimension].append(column)

        for dimension, columns in columns_by_dimension.items():

            fields = list({c.name for c in columns})

            try:
                fundamentals = get_sharadar_fundamentals_reindexed_like(
                    reindex_like, fields=fields, dimension=dimension)
            except NoFundamentalData:
                fundamentals = None

            for column in columns:
                missing_value = MISSING_VALUES_BY_DTYPE[column.dtype]
                if fundamentals is not None:
                    fundamentals_for_column = fundamentals.loc[column.name]
                    if column.dtype == datetime64ns_dtype:
                        # pd.to_datetime handles NaNs in pandas 0.22 while .astype(column.dtype) doesn't
                        values = fundamentals_for_column.apply(
                            pd.to_datetime).fillna(missing_value).values
                    else:
                        values = fundamentals_for_column.astype(
                            column.dtype).fillna(missing_value).values

                else:
                    values = pd.DataFrame(missing_value,
                                          columns=reindex_like.columns,
                                          index=reindex_like.index).values

                out[column] = AdjustedArray(values,
                                            adjustments={},
                                            missing_value=missing_value)

        return out
Ejemplo n.º 3
0
    def prices_to_signals(self, prices):

        # Step 1.c: get a mask of stocks with adequate dollar volume
        closes = prices.loc["Close"]
        volumes = prices.loc["Volume"]
        avg_dollar_volumes = (closes * volumes).rolling(
            self.DOLLAR_VOLUME_WINDOW).mean()
        dollar_volume_ranks = avg_dollar_volumes.rank(axis=1,
                                                      ascending=False,
                                                      pct=True)
        have_adequate_dollar_volumes = dollar_volume_ranks <= (
            self.DOLLAR_VOLUME_TOP_N_PCT / 100)

        # Step 2. Apply value screen: select cheapest N percent of stocks by
        # enterprise multiple (EV/EBITDA) (N=10)
        fundamentals = get_sharadar_fundamentals_reindexed_like(
            closes,
            fields=["EVEBIT"],
            dimension="ARQ",
            domain=self.MASTER_DOMAIN)
        enterprise_multiples = fundamentals.loc["EVEBIT"]
        # Ignore negative enterprise multiples, which indicate negative earnings
        enterprise_multiples = enterprise_multiples.where(
            enterprise_multiples > 0)
        # Only apply rankings to stocks with adequate dollar volume
        value_ranks = enterprise_multiples.where(
            have_adequate_dollar_volumes).rank(axis=1,
                                               ascending=True,
                                               pct=True)
        are_value_stocks = value_ranks <= (self.VALUE_TOP_N_PCT / 100)

        # Step 3: Rank by quality: of the value stocks, select the N percent
        # with the highest quality, as ranked by Piotroski F-Score (N=50)
        f_scores = self.get_f_scores(closes)
        # Rank the value stocks by F-Score
        quality_ranks = f_scores.where(are_value_stocks).rank(axis=1,
                                                              ascending=False,
                                                              pct=True)
        long_signals = quality_ranks <= (self.QUALITY_TOP_N_PCT / 100)

        # Save holdings to results for inspection
        symbols = prices.loc["Symbol"].reindex(closes.index, method="ffill")
        self.save_to_results("Holdings", symbols.where(long_signals))

        return long_signals.astype(int)
Ejemplo n.º 4
0
    def prices_to_signals(self, prices):

        # Step 1.c: get a mask of stocks with adequate dollar volume
        closes = prices.loc["Close"]
        volumes = prices.loc["Volume"]
        avg_dollar_volumes = (closes * volumes).rolling(
            self.DOLLAR_VOLUME_WINDOW).mean()
        dollar_volume_ranks = avg_dollar_volumes.rank(axis=1,
                                                      ascending=False,
                                                      pct=True)
        have_adequate_dollar_volumes = dollar_volume_ranks <= (
            self.DOLLAR_VOLUME_TOP_N_PCT / 100)

        # Step 2. Apply value screen: select cheapest N percent of stocks by
        # enterprise multiple (EV/EBITDA) (N=10)
        fundamentals = get_sharadar_fundamentals_reindexed_like(
            closes,
            fields=["EVEBIT", "EBIT"],
            dimension="ART",
            domain=self.MASTER_DOMAIN)
        enterprise_multiples = fundamentals.loc["EVEBIT"]
        ebits = fundamentals.loc["EBIT"]
        # Ignore negative earnings
        enterprise_multiples = enterprise_multiples.where(ebits > 0)
        # Only apply rankings to stocks with adequate dollar volume
        value_ranks = enterprise_multiples.where(
            have_adequate_dollar_volumes).rank(axis=1,
                                               ascending=True,
                                               pct=True)
        are_value_stocks = value_ranks <= (self.VALUE_TOP_N_PCT / 100)

        # Step 3: Rank by quality: of the value stocks, select the N percent
        # with the highest quality, as ranked by Piotroski F-Score (N=50)
        f_scores = self.get_f_scores(closes)
        # Rank the value stocks by F-Score
        quality_ranks = f_scores.where(are_value_stocks).rank(axis=1,
                                                              ascending=False,
                                                              pct=True)
        are_quality_value_stocks = quality_ranks <= (self.QUALITY_TOP_N_PCT /
                                                     100)

        # Step 4: apply momentum screen
        year_ago_closes = closes.shift(self.MOMENTUM_WINDOW)
        month_ago_closes = closes.shift(
            self.MOMENTUM_EXCLUDE_MOST_RECENT_WINDOW)
        returns = (month_ago_closes - year_ago_closes) / year_ago_closes.where(
            year_ago_closes != 0)  # avoid DivisionByZero errors
        # Rank only among high quality value stocks
        returns_ranks = returns.where(are_quality_value_stocks).rank(
            axis=1, ascending=False, pct=True)
        have_momentum = returns_ranks <= (self.MOMENTUM_TOP_N_PCT / 100)

        # Step 5: Filter by smoothness of momentum
        are_positive_days = closes.pct_change() > 0
        positive_days_last_twelve_months = are_positive_days.astype(
            int).rolling(self.MOMENTUM_WINDOW).sum()
        positive_days_last_twelve_months_ranks = positive_days_last_twelve_months.where(
            have_momentum).rank(axis=1, ascending=False, pct=True)
        have_smooth_momentum = positive_days_last_twelve_months_ranks <= (
            self.SMOOTHEST_TOP_N_PCT / 100)

        signals = have_smooth_momentum.astype(int)

        return signals
Ejemplo n.º 5
0
    def get_f_scores(self, closes):

        # Step 1: query relevant indicators
        fundamentals = get_sharadar_fundamentals_reindexed_like(
            closes,
            domain=self.MASTER_DOMAIN,
            dimension="ART",  # As-reported trailing twelve month reports
            fields=[
                "ROA",  # Return on assets
                "ASSETS",  # Total Assets
                "NCFO",  # Net Cash Flow from Operations
                "DE",  # Debt to Equity Ratio
                "CURRENTRATIO",  # Current ratio
                "SHARESWA",  # Outstanding shares
                "GROSSMARGIN",  # Gross margin
                "ASSETTURNOVER",  # Asset turnover
                "REPORTPERIOD"
            ])
        return_on_assets = fundamentals.loc["ROA"]
        total_assets = fundamentals.loc["ASSETS"]
        operating_cash_flows = fundamentals.loc["NCFO"]
        leverages = fundamentals.loc["DE"]
        current_ratios = fundamentals.loc["CURRENTRATIO"]
        shares_out = fundamentals.loc["SHARESWA"]
        gross_margins = fundamentals.loc["GROSSMARGIN"]
        asset_turnovers = fundamentals.loc["ASSETTURNOVER"]

        # Step 2: many Piotroski F-score components compare current to previous
        # values, so get DataFrames of previous values

        # Step 2.a: get a boolean mask of the first day of each newly reported fiscal
        # period
        fiscal_periods = fundamentals.loc["REPORTPERIOD"]
        are_new_fiscal_periods = fiscal_periods != fiscal_periods.shift()

        # Step 2.b: shift the ROAs forward one fiscal period by (1) shifting the ratios one day,
        # (2) keeping only the ones that fall on the first day of the newly reported
        # fiscal period, and (3) forward-filling
        previous_return_on_assets = return_on_assets.shift().where(
            are_new_fiscal_periods).fillna(method="ffill")

        # Step 2.c: Repeat for other indicators
        previous_leverages = leverages.shift().where(
            are_new_fiscal_periods).fillna(method="ffill")
        previous_current_ratios = current_ratios.shift().where(
            are_new_fiscal_periods).fillna(method="ffill")
        previous_shares_out = shares_out.shift().where(
            are_new_fiscal_periods).fillna(method="ffill")
        previous_gross_margins = gross_margins.shift().where(
            are_new_fiscal_periods).fillna(method="ffill")
        previous_asset_turnovers = asset_turnovers.shift().where(
            are_new_fiscal_periods).fillna(method="ffill")

        # Step 3: calculate F-Score components; each resulting component is a DataFrame
        # of booleans
        have_positive_return_on_assets = return_on_assets > 0
        have_positive_operating_cash_flows = operating_cash_flows > 0
        have_increasing_return_on_assets = return_on_assets > previous_return_on_assets
        have_more_cash_flow_than_incomes = operating_cash_flows / total_assets > return_on_assets
        have_decreasing_leverages = leverages < previous_leverages
        have_increasing_current_ratios = current_ratios > previous_current_ratios
        have_no_new_shares = shares_out <= previous_shares_out
        have_increasing_gross_margins = gross_margins > previous_gross_margins
        have_increasing_asset_turnovers = asset_turnovers > previous_asset_turnovers

        # Step 4: convert the booleans to integers and sum to get F-Score (0-9)
        f_scores = (have_positive_return_on_assets.astype(int) +
                    have_positive_operating_cash_flows.astype(int) +
                    have_increasing_return_on_assets.astype(int) +
                    have_more_cash_flow_than_incomes.astype(int) +
                    have_decreasing_leverages.astype(int) +
                    have_increasing_current_ratios.astype(int) +
                    have_no_new_shares.astype(int) +
                    have_increasing_gross_margins.astype(int) +
                    have_increasing_asset_turnovers.astype(int))

        self.save_to_results("FScore", f_scores)
        return f_scores
    def add_quality_features(self, prices, features):
        """
        Adds quality features, based on the Piotroski F-score.
        """
        closes = prices.loc["Close"]

        # Step 1: query relevant indicators
        fundamentals = get_sharadar_fundamentals_reindexed_like(
            closes,
            dimension="ART",  # As-reported TTM reports
            fields=[
                "ROA",  # Return on assets
                "ASSETS",  # Total Assets
                "NCFO",  # Net Cash Flow from Operations
                "DE",  # Debt to Equity Ratio
                "CURRENTRATIO",  # Current ratio
                "SHARESWA",  # Outstanding shares
                "GROSSMARGIN",  # Gross margin
                "ASSETTURNOVER",  # Asset turnover
            ])
        return_on_assets = fundamentals.loc["ROA"]
        total_assets = fundamentals.loc["ASSETS"]
        operating_cash_flows = fundamentals.loc["NCFO"]
        leverages = fundamentals.loc["DE"]
        current_ratios = fundamentals.loc["CURRENTRATIO"]
        shares_out = fundamentals.loc["SHARESWA"]
        gross_margins = fundamentals.loc["GROSSMARGIN"]
        asset_turnovers = fundamentals.loc["ASSETTURNOVER"]

        # Step 2: many Piotroski F-score components compare current to previous
        # values, so get DataFrames of previous values

        # Step 2.a: get a boolean mask of the first day of each newly reported fiscal
        # period
        fundamentals = get_sharadar_fundamentals_reindexed_like(
            closes,
            dimension="ARQ",  # As-reported quarterly reports
            fields=["REPORTPERIOD"])
        fiscal_periods = fundamentals.loc["REPORTPERIOD"]
        are_new_fiscal_periods = fiscal_periods != fiscal_periods.shift()

        periods_ago = 4

        # this function will be applied sid by sid and returns a Series of
        # earlier fundamentals
        def n_periods_ago(fundamentals_for_sid):
            sid = fundamentals_for_sid.name
            # remove all rows except for new fiscal periods
            new_period_fundamentals = fundamentals_for_sid.where(
                are_new_fiscal_periods[sid]).dropna()
            # Shift the desired number of periods
            earlier_fundamentals = new_period_fundamentals.shift(periods_ago)
            # Reindex and forward-fill to restore original shape
            earlier_fundamentals = earlier_fundamentals.reindex(
                fundamentals_for_sid.index, method="ffill")
            return earlier_fundamentals

        previous_return_on_assets = return_on_assets.apply(n_periods_ago)
        previous_leverages = leverages.apply(n_periods_ago)
        previous_current_ratios = current_ratios.apply(n_periods_ago)
        previous_shares_out = shares_out.apply(n_periods_ago)
        previous_gross_margins = gross_margins.apply(n_periods_ago)
        previous_asset_turnovers = asset_turnovers.apply(n_periods_ago)

        # Step 3: calculate F-Score components; each resulting component is a DataFrame
        # of booleans
        have_positive_return_on_assets = return_on_assets > 0
        have_positive_operating_cash_flows = operating_cash_flows > 0
        have_increasing_return_on_assets = return_on_assets > previous_return_on_assets
        total_assets = total_assets.where(
            total_assets > 0)  # avoid DivisionByZero errors
        have_more_cash_flow_than_incomes = operating_cash_flows / total_assets > return_on_assets
        have_decreasing_leverages = leverages < previous_leverages
        have_increasing_current_ratios = current_ratios > previous_current_ratios
        have_no_new_shares = shares_out <= previous_shares_out
        have_increasing_gross_margins = gross_margins > previous_gross_margins
        have_increasing_asset_turnovers = asset_turnovers > previous_asset_turnovers

        # Save each boolean F score component as a feature
        features[
            "have_positive_return_on_assets"] = have_positive_return_on_assets.astype(
                int)
        features[
            "have_positive_operating_cash_flows"] = have_positive_operating_cash_flows.astype(
                int)
        features[
            "have_increasing_return_on_assets"] = have_increasing_return_on_assets.astype(
                int)
        features[
            "have_more_cash_flow_than_incomes"] = have_more_cash_flow_than_incomes.astype(
                int)
        features[
            "have_decreasing_leverages"] = have_decreasing_leverages.astype(
                int)
        features[
            "have_increasing_current_ratios"] = have_increasing_current_ratios.astype(
                int)
        features["have_no_new_shares"] = have_no_new_shares.astype(int)
        features[
            "have_increasing_gross_margins"] = have_increasing_gross_margins.astype(
                int)
        features[
            "have_increasing_asset_turnovers"] = have_increasing_asset_turnovers.astype(
                int)

        # Sum the components to get the F-Score and saves the ranks as a feature
        f_scores = (have_positive_return_on_assets.astype(int) +
                    have_positive_operating_cash_flows.astype(int) +
                    have_increasing_return_on_assets.astype(int) +
                    have_more_cash_flow_than_incomes.astype(int) +
                    have_decreasing_leverages.astype(int) +
                    have_increasing_current_ratios.astype(int) +
                    have_no_new_shares.astype(int) +
                    have_increasing_gross_margins.astype(int) +
                    have_increasing_asset_turnovers.astype(int))
        features["f_score_ranks"] = f_scores.rank(axis=1, pct=True).fillna(0.5)