def test_regressor_deprecated(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') scikit_learn.KerasRegressor(build_fn_reg) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) assert 'KerasRegressor is deprecated' in str(w[-1].message)
def test_regression_build_fn(self): with self.cached_session(): reg = scikit_learn.KerasRegressor(build_fn=build_fn_reg, hidden_dim=HIDDEN_DIM, batch_size=BATCH_SIZE, epochs=EPOCHS) assert_regression_works(reg)
def evaluate_model(model, X, Y): # evaluate model estimator = kw_learn.KerasRegressor(build_fn=model, epochs=100, batch_size=5, verbose=0) kfold = model_sel.KFold(n_splits=10) results = model_sel.cross_val_score(estimator, X, Y, cv=kfold) print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std()))
def test_regression_class_build_fn(self): class ClassBuildFnReg(object): def __call__(self, hidden_dim): return build_fn_reg(hidden_dim) with self.cached_session(): reg = scikit_learn.KerasRegressor(build_fn=ClassBuildFnReg(), hidden_dim=HIDDEN_DIM, batch_size=BATCH_SIZE, epochs=EPOCHS) assert_regression_works(reg)
def create_column_dnn(predict_feature='close', ticker='', debug=False, use_epochs=10, use_batch_size=10, use_test_size=0.1, use_random_state=1, use_seed=7, use_shuffle=False, model_verbose=True, fit_verbose=True, use_scalers=True, df=[], dnn_config={}, compile_config={}, s3_bucket='', s3_key='', send_plots_to_slack=False): """create_column_dnn For scaler-normalized datasets this will compile numeric columns and ignore string/non-numeric columns as training and test feature columns :param predict_feature: Column to create DNN with :param ticker: Ticker being used :param debug: Debug mode :param use_epochs: Epochs times to use :param use_batch_size: Batch size to use :param use_test_size: Test size to use :param use_random_state: Random state to train with :param use_seed: Seed used to build scalar datasets :param use_shuffle: To shuffle the regression estimator or not :param model_verbose: To use a verbose Keras regression model or not :param fit_verbose: To use a verbose fitting of the regression estimator :param use_scalers: To build using scalars or not :param df: Ticker dataset :param dnn_config: Deep Neural Net keras model json to build the model :param compile_config: Deep Neural Net dictionary of compile options :param s3_bucket: S3 Bucket :param s3_key: S3 Key """ df_filter = (df[f'{predict_feature}'] >= 0.1) first_date = df[df_filter]['date'].iloc[0] end_date = df[df_filter]['date'].iloc[-1] if 'minute' in df: found_valid_minute = df['minute'].iloc[0] if found_valid_minute: first_date = df[df_filter]['minute'].iloc[0] end_date = df[df_filter]['minute'].iloc[-1] num_rows = len(df.index) log.info(f'prepared training data from ' f'history {s3_bucket}@{s3_key} ' f'rows={num_rows} ' f'dates: {first_date} to {end_date}') if debug: for i, r in df.iterrows(): log.info(f'{r["minute"]} - {r["{}".format(predict_feature)]}') # end of for loop log.info(f'columns: {df.columns.values}') log.info(f'rows: {len(df.index)}') # end of debug use_all_features = use_scalers all_features = [] train_features = [] if use_all_features: for c in df.columns.values: if (pandas_types.is_numeric_dtype(df[c]) and c not in train_features): if c != predict_feature: train_features.append(c) if c not in all_features: all_features.append(c) dnn_config['layers'][-1]['activation'] = ('sigmoid') else: temp_choices = choices[:] temp_choices.remove(predict_feature) train_features = ['open'] train_features.extend(temp_choices) all_features = [f'{predict_feature}'] + train_features num_features = len(train_features) features_and_minute = ['minute'] + all_features log.info('converting columns to floats') timeseries_df = df[df_filter][features_and_minute].fillna(-10000.0) converted_df = timeseries_df[all_features].astype('float32') train_df = None test_df = None scaler_predictions = None if use_all_features: scaler_res = build_scaler_datasets.build_datasets_using_scalers( train_features=train_features, test_feature=predict_feature, df=converted_df, test_size=use_test_size, seed=use_seed) if scaler_res['status'] != ae_consts.SUCCESS: log.error('failed to build scaler train and test datasets') return train_df = scaler_res['scaled_train_df'] test_df = scaler_res['scaled_test_df'] x_train = scaler_res['x_train'] x_test = scaler_res['x_test'] y_train = scaler_res['y_train'] y_test = scaler_res['y_test'] scaler_predictions = scaler_res['scaler_test'] else: log.info('building train and test dfs from subset of features') train_df = converted_df[train_features] test_df = converted_df[[predict_feature]] log.info(f'splitting {num_rows} into test and training ' f'size={use_test_size}') (x_train, x_test, y_train, y_test) = tt_split.train_test_split(train_df, test_df, test_size=use_test_size, random_state=use_random_state) log.info(f'split breakdown - ' f'x_train={len(x_train)} ' f'x_test={len(x_test)} ' f'y_train={len(y_train)} ' f'y_test={len(y_test)}') def set_model(): return build_dnn.build_regression_dnn(num_features=num_features, compile_config=compile_config, model_config=dnn_config) estimator = keras_scikit.KerasRegressor(build_fn=set_model, epochs=use_epochs, batch_size=use_batch_size, verbose=model_verbose) log.info(f'fitting estimator - ' f'predicting={predict_feature} ' f'epochs={use_epochs} ' f'batch={use_batch_size} ' f'test_size={use_test_size} ' f'seed={use_seed}') history = estimator.fit(x_train, y_train, validation_data=(x_train, y_train), epochs=use_epochs, batch_size=use_batch_size, shuffle=use_shuffle, verbose=fit_verbose) created_on = (datetime.datetime.now().strftime( ae_consts.COMMON_TICK_DATE_FORMAT)) plot_fit_history.plot_dnn_fit_history( df=history.history, title=(f'DNN Errors Over Training Epochs\n' f'Training Data: s3://{s3_bucket}/{s3_key}\n' f'Created: {created_on}'), red='mean_squared_error', blue='mean_absolute_error', green='acc', orange='cosine_proximity', send_plots_to_slack=send_plots_to_slack) # on production use newly fetched pricing data # not the training data predict_records = [] if use_all_features: prediction_res = build_scaler_df.build_scaler_dataset_from_df( df=converted_df[train_features]) if prediction_res['status'] == ae_consts.SUCCESS: predict_records = prediction_res['df'] else: predict_records = converted_df[train_features] log.info(f'making predictions: {len(predict_records)}') predictions = estimator.model.predict(predict_records, verbose=True) np.set_printoptions(threshold=np.nan) indexes = tf.argmax(predictions, axis=1) data = {} data['indexes'] = indexes price_predictions = [] if use_all_features and scaler_predictions: price_predictions = [ ae_consts.to_f(x) for x in scaler_predictions.inverse_transform( predictions.reshape(-1, 1)).reshape(-1) ] else: price_predictions = [ae_consts.to_f(x[0]) for x in predictions] timeseries_df[f'predicted_{predict_feature}'] = price_predictions timeseries_df['error'] = (timeseries_df[f'{predict_feature}'] - timeseries_df[f'predicted_{predict_feature}']) output_features = [ 'minute', f'{predict_feature}', f'predicted_{predict_feature}', 'error' ] date_str = (f'Dates: {timeseries_df["minute"].iloc[0]} ' f'to ' f'{timeseries_df["minute"].iloc[-1]}') log.info(f'historical {predict_feature} with predicted {predict_feature}: ' f'{timeseries_df[output_features]}') log.info(date_str) log.info(f'Columns: {output_features}') average_error = ae_consts.to_f(timeseries_df['error'].sum() / len(timeseries_df.index)) log.info(f'Average historical {predict_feature} ' f'vs predicted {predict_feature} error: ' f'{average_error}') log.info( f'plotting historical {predict_feature} vs predicted {predict_feature}' f' from training with columns={num_features}') ts_filter = (timeseries_df[f'{predict_feature}'] > 0.1) latest_feature = (timeseries_df[ts_filter][f'{predict_feature}'].iloc[-1]) latest_predicted_feature = ( timeseries_df[ts_filter][f'predicted_{predict_feature}'].iloc[-1]) log.info(f'{end_date} {predict_feature}={latest_feature} ' f'with ' f'predicted_{predict_feature}={latest_predicted_feature}') plot_trading_history.plot_trading_history( title=(f'{ticker} - Historical {predict_feature.title()} vs ' f'Predicted {predict_feature.title()}\n' f'Number of Training Features: {num_features}\n' f'{date_str}'), df=timeseries_df, red=f'{predict_feature}', blue=f'predicted_{predict_feature}', green=None, orange=None, date_col='minute', date_format='%d %H:%M:%S\n%b', xlabel='minute', ylabel=(f'Historical {predict_feature.title()} vs ' f'Predicted {predict_feature.title()}'), df_filter=ts_filter, width=8.0, height=8.0, show_plot=True, dropna_for_all=False, send_plots_to_slack=send_plots_to_slack)