예제 #1
0
 def predict(self, X_test, pred_index=None):
     predicted_values = X_test * self.weights
     if self.global_hyperparams['output_type'] == 'C':
         predicted_values = to_class(predicted_values,
                                     self.global_hyperparams['threshold'])
     if pred_index is not None:
         self._store_predicted_values(pred_index, predicted_values)
     return predicted_values
예제 #2
0
 def __init__(self,
              global_hyperparams,
              pred_val,
              asset_data,
              threshold=None):
     self.pred_val = pred_val
     self.output_type = global_hyperparams['output_type']
     self.threshold = threshold if threshold is not None else global_hyperparams[
         'threshold']
     self.pred_val_class = pred_val if self.output_type == 'C' else to_class(
         pred_val, threshold)
     self.pred_val_class = self.pred_val_class.squeeze()
     self.asset_data = asset_data[self.pred_val_class.index]
예제 #3
0
파일: generic_algo.py 프로젝트: ifzz/MSTA
 def predict(self, X_test, pred_index=None):
     """ Predict function used in main and in the cross validation process
     It can accept as an X_test input either an array or a dataframe and gives a corresponding output
     This version of the function only works for ML algorithm and it has to be recoded for TA algorithms
     If a pred_index is provided, the prediction will be stored in predicted_values with this index
     """
     if self.algo_type=="ML":
         predicted_values=self.model.predict(X_test)
         if self.global_hyperparams["output_type"]=="C" and self.model._estimator_type!='classifier': # If we use a regression model and we still need to output a class
             predicted_values=to_class(predicted_values, self.global_hyperparams["threshold"])    
     else:
         predicted_values=np.nan # Not integrated yet
     if pred_index is not None:
         self._store_predicted_values(pred_index, predicted_values)
     return predicted_values
예제 #4
0
    def predict(self, X_test, pred_index=None):
        w = self.window_size
        if self.mean_type == "arithmetic":
            predicted_values = X_test.iloc[:, :w].mean(axis=1, skipna=None)
        elif self.mean_type == "geometric":
            # Let us note that the geometric mean should be optimized using numpy vectorized operations
            predicted_values = 1
            for col in X_test.iloc[:, :w].columns:
                predicted_values = predicted_values * (1 + X_test.iloc[:, col])
            predicted_values = np.power(predicted_values, 1 / w) - 1

        # The output will be different in case of a regression or classification, no need to change the output for a regression
        if self.global_hyperparams["output_type"] == "C":
            threshold = self.global_hyperparams["threshold"]
            predicted_values = to_class(predicted_values, threshold)

        if pred_index is not None:
            self._store_predicted_values(pred_index, predicted_values)
        return predicted_values  # here we have a redundency in the return and the side effect of the method, this is used to simplify coding
예제 #5
0
    def predict(self, X_test, pred_index=None):
        w = self.window_size
        if self.mean_type == 'arithmetic':
            predicted_values = X_test.iloc[:, :w].mean(axis=1, skipna=None)
        elif self.mean_type == 'geometric':
            # Let us note that the geometric mean should be optimized using numpy vectorized operations
            predicted_values = 1
            for col in X_test.iloc[:, :
                                   w].columns:  # We stop at the column number w
                predicted_values = predicted_values * (1 + X_test[col].values)
            predicted_values = np.power(predicted_values, 1 / w) - 1

        # We classify the predictions in case of a classification output
        if self.global_hyperparams['output_type'] == 'C':
            threshold = self.global_hyperparams['threshold']
            predicted_values = to_class(predicted_values, threshold)

        if pred_index is not None:
            self._store_predicted_values(pred_index, predicted_values)
        return predicted_values
예제 #6
0
파일: __main__.py 프로젝트: zfy1989lee/MSTA
dataset=data.dataset_building('quandl', asset_ids, start_date, end_date, n_max=None) # please recode the dataset_building functio to make it support local and quandl data
dataset = data.add_returns(dataset, [0]) # creates some NANs as a result of the returns computation
dataset.dropna(inplace=True)

# We select an asset returns time series to predict from the dataset
Y_0=dataset[dataset.columns[1]] # need to find a reliable way to find the index of the column 

# X: include all the lags of Y and additional data
lags=range(1,rolling_window_size+1)
X=data.lagged(dataset,lags=lags) # In X please always include all the lags of Y that you want to use for the HM as first colunms
#max_lags=max(lags)
# We could also turn X into classes data, is that meaningful?
# X=to_class(X,threshold)    

# In case of classification, we classify Y 
if output_type=='C': Y=data.to_class(Y_0, threshold)
    

## Creating & calibrating the different algorithms

# First define a dictionary of algorithm associated with their names
# As arguments please include the fixed hyperparams of the model as a named argument
# For the hyperparameters grid to use in cross validation please provide a dictionary using sklearn syntax 
algos={'HM':HM(global_hyperparams, hp_grid={'window_size':[10,100,500]}),
       #'LR':LR(global_hyperparams),
       #'Lasso':LR(global_hyperparams, regularization='Lasso',hp_grid={'alpha':np.logspace(-4,1,10)}),
       #'ElasticNet':LR(global_hyperparams, regularization='ElasticNet',hp_grid={'alpha':np.logspace(-3,1,20),'l1_ratio':np.linspace(0,1,20)}),
       #'Tree':DT(global_hyperparams,hp_grid={'max_features':['sqrt',None],'criterion':['gini','entropy']}),
       #'RF':RF(global_hyperparams, hp_grid={'max_features':['sqrt',None],'n_estimators':range(10,200,20)}),
       #'ADAB':ADAB(global_hyperparams, hp_grid={'n_estimators':[1,5,10]}, base_algo=DT(global_hyperparams)),
       #'MLP':MLP(global_hyperparams,hp_grid={'alpha':np.linspace(0.1,1,9),'hidden_layer_sizes':[(10,),(100,),(200,)]},activation='relu', solver='lbfgs'),
예제 #7
0
파일: main.py 프로젝트: ifzz/MSTA
def __main__():
    ## Global Hyperparameters
    # The window size of the rolling window used to define each training set size
    # The models will never see more than this number of points at once
    rolling_window_size = 500

    # Output type : C for Classification, R for Regression
    # Note that for a Classification, 1 means positive return, -1 means negative, and 0 means bellow threshold
    output_type = "C"
    # In case of 3 class Classification, please provide an absolute level for the zero return threshold
    # Fix it to 0 for a binary classification
    # The optimal value can also be globally optimized as a result of the PnL optimisation and will be function of the volatility of the asset
    threshold = 0.001

    # This dictionary of global hyperparameters will be passed an an argument of all built algorithms
    global_hyperparams = {
        "rolling_window_size": rolling_window_size,
        "output_type": output_type,
        "threshold": threshold
    }

    ## Building the dataset
    dataset = data.dataset_building(n_max=2000)

    # We select an asset returns time series to predict from the dataset
    asset_label = "EURUSD Curncy"
    Y = dataset[[asset_label]]
    Y.dropna(inplace=True)

    # With lags, used as X, maybe this implementation is not optimal, think about a slicing way to do that?
    lags = range(1, rolling_window_size + 1)
    X = data.lagged(Y, lags=lags)
    max_lags = max(lags)
    # We could also turn X into classes data, is that meaningful?
    # X=to_class(X,threshold)

    # In case of classification, we transform Y and put the classes labels into global_hyperparams
    if output_type == "C":
        Y = data.to_class(
            Y, threshold
        )  # Notice that now the values of Y is the index of the class in classes
        classes = np.unique(Y)
        global_hyperparams["classes"] = classes

## Creating & calibrating the different algorithms

# First define a dictionary of algorithm associated with their names
# As arguments please include the fixed hyperparams of the model as a named argument
# For the hyperparameters grid to use in cross validation please provide a dictionary using sklearn syntax
    algos = {
        "HM AR Full window":
        HM(global_hyperparams,
           window_size=10,
           hp_grid={'window_size': [1, 100]}),
        "HM GEO Full window":
        HM(global_hyperparams,
           mean_type="geometric",
           hp_grid={'window_size': [1, 10, 50, 100]}),
        "HM AR Short Term":
        HM(global_hyperparams, window_size=10),
        "LR":
        LR(global_hyperparams),
        "Lasso":
        LR(global_hyperparams,
           regularization="Lasso",
           hp_grid={"alpha": np.logspace(-4, 1, 5)})
    }

    # Then we just allow ourselves to work/calib/fit/train only a subsets of these algos
    #algos_used=algos.keys()
    algos_used = ["Lasso"]
    #algos_used=["HM AR Full window"]
    #algos_used=["HM GEO Full window"]

    for key in algos_used:
        # We let each algo select the relevent data to work on
        algos[key].select_data(X)

        for i in range(
                rolling_window_size + max_lags, len(Y.index)
        ):  # Note that i in the numeric index in Y of the predicted value
            train = range(i - rolling_window_size,
                          i)  # should be equal to i-rolling_window_size:i-1
            test = [
                i
            ]  # I am not sure of the index, we can check, it is inside [] to ;ake urer the slicing produces a dataframe
            pred_index = Y.index[test]  # This is the timestamp of i

            # We train all the algos on the testing set, it includes the calibration of hyperparameters and the fitting
            algos[key].calib(X.iloc[train],
                             Y.iloc[train],
                             pred_index,
                             cross_val_type="ts_cv",
                             n_splits=5,
                             calib_type="GridSearch")

            # We build the predictions
            algos[key].predict(X.iloc[test], pred_index)

            # for debug
            print(i)

        # We compute the outputs
        algos[key].compute_outputs(Y)

        # for debug
        print(algos[key].best_hp)
        pass

## Core algorithm
# Hyperparameters of the Core algorithm
    rolling_window_size_core = rolling_window_size
    core_algo = HM(
        global_hyperparams,
        window_size=rolling_window_size_core)  # Average of the predictions

    # We first built a new dataset with all the predictions for the algos, it will be our new X
    X_core = data.core_dataset(algos, algos_used)

    ## Trading Strategy

    ## Backtest/Plots/Trading Execution

    return 0