Ejemplo n.º 1
0
 def get_score_for_clf(self, clf, df_test, feature_name, pca=None):
     x_test = df_test[self.feature_columns_dict[feature_name]]
     x_test_pca = x_test[[col for col in x_test.columns if 'gdf' in col]]
     x_test = x_test[[col for col in x_test.columns if 'gdf' not in col]]
     if pca:
         x_test_pca = pca.transform(x_test_pca)
     for n in range(pca.n_components):
         x_test['pca_{}'.format(n)] = x_test_pca[:, n]
     y_test = df_test['mid_price_indicator'].values
     return model.test_model(clf, x_test, y_test)
Ejemplo n.º 2
0
 def get_score_for_clf(self, clf, df_test, feature_name, pca=None):
     x_test = df_test[self.feature_columns_dict[feature_name]]
     if pca:
         x_test = pca.transform(x_test)
     y_test = df_test['mid_price_indicator'].values
     return model.test_model(clf, x_test, y_test)
Ejemplo n.º 3
0
    def train_lstm(self,
                   clf,
                   feature_name='',
                   should_validate=True,
                   method=None,
                   fit_kwargs=None,
                   compile_kwargs=None,
                   n_steps=None,
                   plot_name=None,
                   class_weight=None,
                   should_return_model=False):
        logger.info('Training %s r=%s s=%s: clf=%s', self.stock, self.r,
                    self.s, clf)

        train_x = self.df[self.feature_columns_dict[feature_name]].values
        test_x = self.df_test[self.feature_columns_dict[feature_name]].values
        train_y = self.df['mid_price_indicator'].values
        test_y = self.df_test['mid_price_indicator'].values

        pca = self.get_pca(feature_name)
        if pca:
            train_x = pca.transform(train_x)
            test_x = pca.transform(test_x)

        if n_steps:
            train_x, train_y = self.split_sequences(train_x,
                                                    train_y,
                                                    n_steps=n_steps)
            test_x, test_y = self.split_sequences(test_x,
                                                  test_y,
                                                  n_steps=n_steps)
        else:

            train_x = np.reshape(train_x,
                                 (train_x.shape[0], 1, train_x.shape[1]))
            test_x = np.reshape(test_x, (test_x.shape[0], 1, test_x.shape[1]))

        if should_validate:
            scores_arrays, m = model.validate_model_lstm(
                clf,
                train_x,
                train_y,
                fit_kwargs=fit_kwargs,
                compile_kwargs=compile_kwargs,
                plot_name=plot_name,
                class_weight=class_weight,
                print_debug=False)
            scores = self.get_mean_scores(scores_arrays)
        else:
            m = clf()
            scores = model.train_model(m,
                                       train_x,
                                       train_y,
                                       compile_kwargs=compile_kwargs,
                                       fit_kwargs=fit_kwargs,
                                       is_lstm=True,
                                       class_weight=class_weight)
        if not method:
            method = 'lstm'
        components_num = None
        if pca:
            components_num = pca.n_components_
        res = {
            **scores, 'stock': self.stock,
            'kernel': method,
            'features': feature_name,
            'pca_components': components_num
        }
        # m = clf()
        # model.train_model(
        #     m, train_x, train_y, compile_kwargs=compile_kwargs, fit_kwargs=fit_kwargs, is_lstm=True,
        #     class_weight=class_weight)
        test_scores = model.test_model(m, test_x, test_y, is_lstm=True)
        logger.info('Finished training %s %s', self.stock, {
            **res,
            **test_scores
        })
        if should_return_model:
            return {**res, **test_scores, 'arch': m.to_json()}, m
        else:
            return {**res, **test_scores, 'arch': m.to_json()}
Ejemplo n.º 4
0
def get_score_for_clf(clf, df_test):
    x_test = df_test[['queue_imbalance']]
    y_test = df_test['mid_price_indicator'].values
    return model.test_model(clf, x_test, y_test)
Ejemplo n.º 5
0
 def get_score_for_clf(self, clf, df_test: pd.DataFrame,
                       feature_name: str) -> dict:
     x_test = df_test[self.feature_columns_dict[feature_name]]
     y_test = df_test['mid_price_indicator'].values
     return model.test_model(clf, x_test, y_test)