Beispiel #1
0
def get_knn_model_data(
    data: Union[pd.Series, pd.DataFrame],
    n_input_days: int,
    n_predict_days: int,
    n_neighbors: int,
    test_size: float,
    end_date: str,
    no_shuffle: bool,
) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, np.ndarray, Any]:
    """Perform knn model fitting and predicting on data

    Parameters
    ----------
    data : Union[pd.Series, pd.DataFrame]
        Data to fit
    n_input_days : int
        Length of input series
    n_predict_days : int
        Number of days to predict
    n_neighbors : int
        Number of neighbors for nn
    test_size : float
        Fraction of data for testing
    end_date : str
        End date for backtesting
    no_shuffle : bool
        Flag to not shuffle train/test data

    Returns
    -------
    pd.DataFrame:
        Dataframe of preditions
    np.array:
        Array of validation predictions
    np.array:
        Array of validation data
    np.array:
        Array of validation dates
    Any:
        Scaler for processing data
    """
    (
        X_train,
        X_valid,
        y_train,
        y_valid,
        _,
        _,
        _,
        y_dates_valid,
        forecast_data_input,
        dates_forecast_input,
        scaler,
        is_error,
    ) = prepare_scale_train_valid_test(data, n_input_days, n_predict_days,
                                       test_size, end_date, no_shuffle)
    if is_error:
        return pd.DataFrame(), np.array(0), np.array(0), np.array(0), None

    future_dates = get_next_stock_market_days(dates_forecast_input[-1],
                                              n_next_days=n_predict_days)
    console.print(
        f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}.  Using {X_valid.shape[0]} sequences "
        f" of length {X_valid.shape[1]} for validation")
    # Machine Learning model
    knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors)
    knn.fit(
        X_train.reshape(X_train.shape[0], X_train.shape[1]),
        y_train.reshape(y_train.shape[0], y_train.shape[1]),
    )

    preds = knn.predict(X_valid.reshape(X_valid.shape[0], X_valid.shape[1]))
    forecast_data = knn.predict(forecast_data_input.reshape(1, -1))
    forecast_data = scaler.inverse_transform(forecast_data.reshape(1, -1))
    forecast_data_df = pd.DataFrame(list(forecast_data.T), index=future_dates)

    return forecast_data_df, preds, y_valid, y_dates_valid, scaler
Beispiel #2
0
def conv1d(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame):
    """
    Train a 1D Convolutional Neural Net (1D CNN)
    Parameters
    ----------
    other_args:List[str]
        Argparse arguments
    s_ticker: str
        Stock ticker
    df_stock: pd.DataFrame
        Dataframe of stock prices
    """
    try:
        ns_parser = parse_args(
            prog="conv1d",
            description="""1D CNN.""",
            other_args=other_args,
        )
        if not ns_parser:
            return
        (
            X_train,
            X_valid,
            y_train,
            y_valid,
            _,
            _,
            _,
            y_dates_valid,
            forecast_data_input,
            dates_forecast_input,
            scaler,
            is_error,
        ) = prepare_scale_train_valid_test(df_stock["Adj Close"], ns_parser)
        if is_error:
            return
        print(
            f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}.  Using {X_valid.shape[0]} sequences "
            f" of length {X_valid.shape[1]} for validation. Model will run {ns_parser.n_loops} loops"
        )
        future_dates = get_next_stock_market_days(dates_forecast_input[-1],
                                                  n_next_days=ns_parser.n_days)

        preds = np.zeros(
            (ns_parser.n_loops, X_valid.shape[0], ns_parser.n_days))
        forecast_data = np.zeros((ns_parser.n_loops, ns_parser.n_days))
        for i in range(ns_parser.n_loops):
            # Build Neural Network model
            model = build_neural_network_model(
                cfg_nn_models.Convolutional,
                ns_parser.n_inputs,
                ns_parser.n_days,
            )

            model.compile(
                optimizer=optimizers[cfg_nn_models.Optimizer](
                    learning_rate=ns_parser.lr),
                loss=cfg_nn_models.Loss,
            )

            model.fit(
                X_train.reshape(X_train.shape[0], X_train.shape[1], 1),
                y_train,
                epochs=ns_parser.n_epochs,
                verbose=True,
                batch_size=ns_parser.n_batch_size,
                validation_data=(
                    X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1),
                    y_valid,
                ),
                callbacks=[es],
            )

            preds[i] = model.predict(
                X_valid.reshape(X_valid.shape[0], X_valid.shape[1],
                                1)).reshape(X_valid.shape[0], ns_parser.n_days)
            forecast_data[i] = forecast(forecast_data_input, future_dates,
                                        model, scaler).values.flat

        forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates)
        if ns_parser.n_loops > 1:
            forecast_data_df["Median"] = forecast_data_df.median(axis=1)
            print_pretty_prediction(forecast_data_df["Median"],
                                    df_stock["Adj Close"].values[-1])
        else:
            print_pretty_prediction(forecast_data_df[0],
                                    df_stock["Adj Close"].values[-1])
        plot_data_predictions(
            df_stock,
            np.median(preds, axis=0),
            y_valid,
            y_dates_valid,
            scaler,
            f"Conv1D Model on {s_ticker}",
            forecast_data_df,
            ns_parser.n_loops,
        )
        print("")

    except Exception as e:
        print(e)
        traceback.print_exc()
        print("")

    finally:
        restore_env()
Beispiel #3
0
def conv1d_model(
    data: Union[pd.Series, pd.DataFrame],
    n_input: int,
    n_predict: int,
    learning_rate: float,
    epochs: int,
    batch_size: int,
    test_size: float,
    n_loops: int,
    no_shuffle: bool,
) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, np.ndarray, Any]:
    """Train Conv1D model on data based on config params

    Parameters
    ----------
    data : Union[pd.Series, pd.DataFrame]
        Data to fit
    n_input : int
        Length of input sequence
    n_predict : int
        Length of output to predict
    learning_rate : float
        Learning rate for optimizer
    epochs : int
        Number of training epochs
    batch_size : int
        Model batch size
    test_size : float
        Fraction of test size
    n_loops : int
        Number of loops to train model
    no_shuffle : bool
        Flag to not shuffle data

    Returns
    -------
    pd.DataFrame
        Dataframe of predictions
    np.array
        Array of validation predictions
    np.array
        Array of validation data
    np.array
        Array of validation x label data
    Any
        Scaler used for data
    """
    (
        X_train,
        X_valid,
        y_train,
        y_valid,
        _,
        _,
        _,
        y_dates_valid,
        forecast_data_input,
        dates_forecast_input,
        scaler,
        is_error,
    ) = prepare_scale_train_valid_test(data, n_input, n_predict, test_size, "",
                                       no_shuffle)
    if is_error:
        return pd.DataFrame(), np.array(0), np.array(0), np.array(0), None

    console.print(
        f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}.  Using {X_valid.shape[0]} sequences "
        f" of length {X_valid.shape[1]} for validation. Model will run {n_loops} loops"
    )

    future_dates = get_next_stock_market_days(dates_forecast_input[-1],
                                              n_next_days=n_predict)

    preds = np.zeros((n_loops, X_valid.shape[0], n_predict))
    forecast_data = np.zeros((n_loops, n_predict))
    for i in range(n_loops):
        # Build Neural Network model
        model = build_neural_network_model(
            cfg_nn_models.Convolutional,
            n_input,
            n_predict,
        )
        model.compile(
            optimizer=optimizers[cfg_nn_models.Optimizer](
                learning_rate=learning_rate),
            loss=cfg_nn_models.Loss,
        )
        model.fit(
            X_train.reshape(X_train.shape[0], X_train.shape[1], 1),
            y_train,
            epochs=epochs,
            verbose=True,
            batch_size=batch_size,
            validation_data=(
                X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1),
                y_valid,
            ),
            callbacks=[es],
        )

        preds[i] = model.predict(
            X_valid.reshape(X_valid.shape[0], X_valid.shape[1],
                            1)).reshape(X_valid.shape[0], n_predict)

        forecast_data[i] = forecast(forecast_data_input, future_dates, model,
                                    scaler).values.flat

    forecast_data_df = pd.DataFrame(forecast_data.T, index=future_dates)
    return forecast_data_df, preds, y_valid, y_dates_valid, scaler
def k_nearest_neighbors(other_args: List[str], s_ticker: str,
                        df_stock: pd.DataFrame):
    """
    Train KNN model
    Parameters
    ----------
    other_args: List[str]
        List of argparse arguments
    s_ticker: str
        Ticker
    df_stock: pd.DataFrame
        Dataframe of stock prices

    Returns
    -------

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="knn",
        description="""
            K nearest neighbors is a simple algorithm that stores all
            available cases and predict the numerical target based on a similarity measure
            (e.g. distance functions).
        """,
    )

    parser.add_argument(
        "-i",
        "--input",
        action="store",
        dest="n_inputs",
        type=check_positive,
        default=40,
        help="number of days to use as input for prediction.",
    )
    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days.",
    )
    parser.add_argument(
        "-j",
        "--jumps",
        action="store",
        dest="n_jumps",
        type=check_positive,
        default=1,
        help="number of jumps in training data.",
    )
    parser.add_argument(
        "-n",
        "--neighbors",
        action="store",
        dest="n_neighbors",
        type=check_positive,
        default=20,
        help="number of neighbors to use on the algorithm.",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select for testing",
    )

    parser.add_argument(
        "-t",
        "--test_size",
        default=0.2,
        dest="valid_split",
        type=float,
        help="Percentage of data to validate in sample",
    )
    parser.add_argument(
        "-p",
        "--pp",
        action="store",
        dest="s_preprocessing",
        default="none",
        choices=["normalization", "standardization", "minmax", "none"],
        help="pre-processing data.",
    )
    parser.add_argument(
        "--no_shuffle",
        action="store_false",
        dest="no_shuffle",
        default=True,
        help="Specify if shuffling validation inputs.",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        (
            X_train,
            X_valid,
            y_train,
            y_valid,
            _,
            _,
            _,
            y_dates_valid,
            forecast_data_input,
            dates_forecast_input,
            scaler,
            is_error,
        ) = prepare_scale_train_valid_test(df_stock["Adj Close"], ns_parser)
        if is_error:
            print("Error preparing data")
            return
        print(
            f"Training on {X_train.shape[0]} sequences of length {X_train.shape[1]}.  Using {X_valid.shape[0]} sequences "
            f" of length {X_valid.shape[1]} for validation")
        future_dates = get_next_stock_market_days(dates_forecast_input[-1],
                                                  n_next_days=ns_parser.n_days)

        # Machine Learning model
        knn = neighbors.KNeighborsRegressor(n_neighbors=ns_parser.n_neighbors)
        knn.fit(
            X_train.reshape(X_train.shape[0], X_train.shape[1]),
            y_train.reshape(y_train.shape[0], y_train.shape[1]),
        )

        preds = knn.predict(X_valid.reshape(X_valid.shape[0],
                                            X_valid.shape[1]))
        forecast_data = knn.predict(forecast_data_input.reshape(1, -1))

        forecast_data_df = pd.DataFrame(
            [i if i > 0 else 0 for i in forecast_data.T], index=future_dates)
        print_pretty_prediction(forecast_data_df[0],
                                df_stock["Adj Close"].values[-1])
        plot_data_predictions(
            df_stock,
            preds,
            y_valid,
            y_dates_valid,
            scaler,
            f"KNN Model with {ns_parser.n_neighbors} Neighbors on {s_ticker}",
            forecast_data_df,
            1,
        )
        print("")

    except Exception as e:
        print(e)
        print("")