Esempio n. 1
0
def test_status_calc():
    """
    Test the status_calc function which generates training labels
    """
    assert utils.status_calc(50, 20, 12.2) == 1
    assert utils.status_calc(12.003, 10, 15) == 0
    assert utils.status_calc(-10, -30, 5) == 1
    assert utils.status_calc(-31, -30, 15) == 0
    assert utils.status_calc(15, 5, 10) == 1

    with pytest.raises(ValueError):
        utils.status_calc(12, 10, -3)
Esempio n. 2
0
def backtest():
    data_df = pd.read_csv("keystats.csv", index_col='Date')
    data_df.dropna(axis=0, how='any', inplace=True)

    features = data_df.columns[6:]
    X = data_df[features].values

    y = list(
        status_calc(data_df["stock_p_change"],
                    data_df["SP500_p_change"],
                    outperformance=float(config['settings']['outperform'])))

    z = np.array(data_df[["stock_p_change", "SP500_p_change"]])

    X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(
        X, y, z, test_size=float(config['settings']['test_size']))

    clf = RandomForestClassifier(n_estimators=100, random_state=0)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    print("Classifier performance\n", "=" * 20)
    print(f"Accuracy score: {clf.score(X_test, y_test): .2f}")
    print(f"Precision score: {precision_score(y_test, y_pred): .2f}")

    num_positive_predictions = sum(y_pred)
    if num_positive_predictions < 0:
        print(colored('[Error:] ', 'red') + "No stocks predicted!")

    stock_returns = 1 + z_test[y_pred, 0] / 100
    market_returns = 1 + z_test[y_pred, 1] / 100

    avg_predicted_stock_growth = sum(stock_returns) / num_positive_predictions
    index_growth = sum(market_returns) / num_positive_predictions
    percentage_stock_returns = 100 * (avg_predicted_stock_growth - 1)
    percentage_market_returns = 100 * (index_growth - 1)
    total_outperformance = percentage_stock_returns - percentage_market_returns

    print("\n Stock prediction performance report \n", "=" * 40)
    print(f"Total Trades:", num_positive_predictions)
    print(
        f"Average return for stock predictions: {percentage_stock_returns: .1f} %"
    )
    print(
        f"Average market return in the same period: {percentage_market_returns: .1f}% "
    )
    print(
        f"Compared to the index, our strategy earns {total_outperformance: .1f} percentage points more"
    )
Esempio n. 3
0
def build_data_set():
    """
    Reads the keystats.csv file and prepares it for scikit-learn
    :return: X_train and y_train numpy arrays
    """
    training_data = pd.read_csv("keystats.csv", index_col='Date')
    training_data.dropna(axis=0, how='any', inplace=True)
    features = training_data.columns[6:]

    X_train = training_data[features].values
    # Generate the labels: '1' if a stock beats the S&P500 by more than 10%, else '0'.
    y_train = list(
        status_calc(training_data["stock_p_change"],
                    training_data["SP500_p_change"], OUTPERFORMANCE))

    return X_train, y_train
Esempio n. 4
0
def backtest():
    """
    A simple backtest, which splits the dataset into a train set and test set,
    then fits a Random Forest classifier to the train set. We print the precision and accuracy
    of the classifier on the test set, then run a backtest comparing this strategy's performance
    to passive investment in the S&P500.
    Please note that there is a methodological flaw in this backtest which will give deceptively
    good results, so the results here should not encourage you to live trade.
    """
    # Build the dataset, and drop any rows with missing values
    data_df = pd.read_csv("keystats.csv", index_col="Date")
    data_df.dropna(axis=0, how="any", inplace=True)

    features = data_df.columns[6:]
    X = data_df[features].values

    # The labels are generated by applying the status_calc to the dataframe.
    # '1' if a stock beats the S&P500 by more than x%, else '0'. Here x is the
    # outperformance parameter, which is set to 10 by default but can be redefined.
    y = list(
        status_calc(
            data_df["stock_p_change"], data_df["SP500_p_change"], outperformance=10
        )
    )

    # z is required for us to track returns
    z = np.array(data_df[["stock_p_change", "SP500_p_change"]])

    # Generate the train set and test set by randomly splitting the dataset
    X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(
        X, y, z, test_size=0.2
    )

    # Instantiate a RandomForestClassifier with 100 trees, then fit it to the training data
    clf = RandomForestClassifier(n_estimators=100, random_state=0)
    clf.fit(X_train, y_train)

    # Generate the predictions, then print test set accuracy and precision
    y_pred = clf.predict(X_test)
    print("Classifier performance\n", "=" * 20)
    print(f"Accuracy score: {clf.score(X_test, y_test): .2f}")
    print(f"Precision score: {precision_score(y_test, y_pred): .2f}")

    # Because y_pred is an array of 1s and 0s, the number of positive predictions
    # is equal to the sum of the array
    num_positive_predictions = sum(y_pred)
    if num_positive_predictions < 0:
        print("No stocks predicted!")

    # Recall that z_test stores the change in stock price in column 0, and the
    # change in S&P500 price in column 1.
    # Whenever a stock is predicted to outperform (y_pred = 1), we 'buy' that stock
    # and simultaneously `buy` the index for comparison.
    stock_returns = 1 + z_test[y_pred, 0] / 100
    market_returns = 1 + z_test[y_pred, 1] / 100

    # Calculate the average growth for each stock we predicted 'buy'
    # and the corresponding index growth
    avg_predicted_stock_growth = sum(stock_returns) / num_positive_predictions
    index_growth = sum(market_returns) / num_positive_predictions
    percentage_stock_returns = 100 * (avg_predicted_stock_growth - 1)
    percentage_market_returns = 100 * (index_growth - 1)
    total_outperformance = percentage_stock_returns - percentage_market_returns

    print("\n Stock prediction performance report \n", "=" * 40)
    print(f"Total Trades:", num_positive_predictions)
    print(f"Average return for stock predictions: {percentage_stock_returns: .1f} %")
    print(
        f"Average market return in the same period: {percentage_market_returns: .1f}% "
    )
    print(
        f"Compared to the index, our strategy earns {total_outperformance: .1f} percentage points more"
    )