def get_train_data (self, fname, asset_class, cols):
        df = create_features(fname, asset_class)
        df = df[df['return_sign'] != 0.0]  # Remove any zero returns to avoiding multi-class classification.
        X_train = df.loc[:, cols]
        y_classifier = df.return_sign

        return X_train, y_classifier
    def pnl_backtesting (self, fname, asset_class, cols):
        """Perform PnL backtesting to check how accurate is the PnL predicted by the model is given previous realised price returns.
        X, y = self.get_train_data(fname, asset_class, cols)

        #Split the data into test and training data and train the model on the training data.
        X_train, X_test, y_train, y_test = train_test_split(X[:-1], y[:-1], test_size=0.3, shuffle=False)
        self.probability = True, y_train)
        probability_down = self.predict_proba(X_test)[:, 0]
        probability_up = self.predict_proba(X_test)[:, 1]
        predicted_direction = self.predict(X_test)
        xdata = get_dates(fname, asset_class)
        df0 = create_features(fname, asset_class)[:-1]
        df0 = df0[df0['return_sign'] != 0.0]  # Remove any zero returns to avoiding multi-class classification. And select the returns corresponding to the dates on the test data.
        df0 = df0[-X_test.shape[0]:]
        true_realised_return = df0['return']
        kelly_optimal_fraction = abs(probability_up - probability_down)
        realised_daily_profit = np.multiply(np.multiply(true_realised_return, predicted_direction), kelly_optimal_fraction)

        df = pd.DataFrame({'Prob-Down':probability_down, 'Prob-Up':probability_up, 'Predicted-Move':predicted_direction, 'Real-Move':y_test, 'Daily PnL':realised_daily_profit})

        #Plot scatter plots for probabilities.
        #color_code = np.multiply(predicted_direction, y)   # 1 if probability UP was correct
        #cmap = ListedColormap(['r', 'g'])  # Red means incorrect prediction.
        #ax.scatter(xdata, probability_up, c=color_code, cmap=cmap)
        #ax.scatter(xdata, probability_down, c=color_code, cmap=cmap)
        #ax.set_title('Transition Probabilities for Down Moves')
        return df
Exemple #3
    def get_feature_scatter_plt(self, fname, asset_class, cols, ax):
        """Function to plot scatters with emphasize on highlighting points from different classes.
        df = create_features(fname, asset_class)
        df = df[
            df['return_sign'] !=
            0.0]  # Remove any zero returns to avoiding multi-class classification.

        #Seperate points into up/down moves.
        up_moves = df[df['return_sign'] == 1.0]
        down_moves = df[df['return_sign'] == -1.0]
        ax.set_title('Decision Boundary')
    def get_train_data (self, fname, asset_class, cols):
        """Function to obtain training data (or training + test) from a dataframe.
        fname : name of the file under which the data is stored in the system's path.
        asset_class : name of the financial instrument to consider as contained in the file given by fname.
        df = create_features(fname, asset_class)
        df = df[df['return_sign'] != 0.0]  # Remove any zero returns to avoiding multi-class classification.
        X_train = df.loc[:, cols]
        y_classifier = df.return_sign

        return X_train, y_classifier
    def get_total_up_down_moves (self, fname, asset_class, cols):
        """Function to compute the overall realised up/down moves.
        df = create_features(fname, asset_class)
        df = df[df['return_sign'] != 0.0]  # Remove any zero returns to avoiding multi-class classification.

        #Seperate points into up/down moves.
        up_moves = df[df['return_sign'] == 1.0]
        down_moves = df[df['return_sign'] == -1.0]
        print('real down moves: ', down_moves.shape[0])
        print('real up moves: ', up_moves.shape[0])
    def pnl_backtesting (self, fname, asset_class, cols):
        #Perform PnL backtesting to check how accurate the path predicted by the model would have been given previous price returns.
        X, y = self.get_train_data(fname, asset_class, cols), y)
        probability_down = self.predict_proba(X)[:, 0]
        probability_up = self.predict_proba(X)[:, 1]
        predicted_direction = self.predict(X)
        xdata = get_dates(fname, asset_class)
        df0 = create_features(fname, asset_class)
        df0 = df0[df0['return_sign'] != 0.0]  # Remove any zero returns to avoiding multi-class classification.
        true_realised_return = df0['return']
        kelly_optimal_fraction = probability_up - probability_down
        realised_daily_profit = np.multiply(np.multiply(true_realised_return, predicted_direction), kelly_optimal_fraction)

        df = pd.DataFrame({'Prob-Down':probability_down, 'Prob-Up':probability_up, 'Predic-Move':predicted_direction, 'Real-Move':y, 'Daily PnL':realised_daily_profit})
    def get_feature_scatter_plt (self, fname, asset_class, cols, ax):
        """Function to plot a simple scatter for two features of the data set, while separating up/down movements.
        df = create_features(fname, asset_class)
        df.dropna(inplace=True)  # Remove any null values.
        df = df[df['return_sign'] != 0.0]  # Remove any zero returns to avoiding multi-class classification.

        #Seperate points into up/down moves.
        up_moves = df[df['return_sign'] == 1.0]
        down_moves = df[df['return_sign'] == -1.0]
        ax.scatter(up_moves[cols[0]], up_moves[cols[1]], color='g', s=1, alpha=1, label='Up')
        ax.scatter(down_moves[cols[0]], down_moves[cols[1]], color='r', s=1, alpha=1, label='Down')
        ax.set_title('2D Representation for 2 Features')
Exemple #8
metric = ['manhattan', 'euclidean', 'mahalanobis']

#Determine which combination of features to investigate; select one from all the possible combinations in the powerset.
feature_combination = powerset(all_cols)[205]

#Create KNNClassifier object, best parameters to use depends on the output of the GridSearchCV class methods.
#knn_object = KNNClassifier(n_neighbors=10, weights='uniform', metric=metric[1], algorithm='brute')
#df1 = knn_object.pnl_backtesting(fname, asset_class, feature_combination)

#Create LogisticRegressionClassifier object.  # high C corresponds to no regularization.
logit_object = LogisticRegressionClassifier(C=50,
df2 = logit_object.pnl_backtesting(fname, asset_class, feature_combination)

#Create SVM object. High C means hard margins, and hence takes longer to solve the optimization problem.
svm_object = SVMClassifier(C=1, kernel='linear', cache_size=1000)
df3 = svm_object.pnl_backtesting(fname, asset_class, feature_combination)

#Dataframe carrying all info about features, especially the actual realised return, that will be compared with the predicted return.
df = create_features(fname, asset_class)
df = df[-df2.shape[0]:]

#Put all predicted PnL for each classifier in the one dataframe df, and then plot all to comapre.
#df['pred_ret_knn'] = df1['Daily PnL']
df['pred_ret_svm'] = df3['Daily PnL']
df['pred_ret_logit'] = df2['Daily PnL']
    'pred_ret_logit']].cumsum().apply(np.exp).plot(figsize=(15, 10))