def _create_dummy_controller(self, simulation_end, simulation_start, temp_dir, calendar_name, oracle_config, scheduling_config): data_source_config = { "filename": TEST_HDF5FILE_NAME, "exchange": calendar_name, "start": datetime.datetime(1999, 1, 11, tzinfo=pytz.utc), "end": datetime.datetime(1999, 1, 11, tzinfo=pytz.utc) } datasource = DummyDataSource(data_source_config) oracle = ConstantOracle( calendar_name=calendar_name, oracle_configuration=oracle_config, scheduling_configuration=scheduling_config ) scheduler = Scheduler( simulation_start, simulation_end, calendar_name, oracle.prediction_frequency, oracle.training_frequency ) controller_configuration = { 'start_date': simulation_start.strftime('%Y-%m-%d'), 'end_date': simulation_end.strftime('%Y-%m-%d') } oracle_performance = OraclePerformance( temp_dir.name, 'test' ) controller = Controller( configuration=controller_configuration, oracle=oracle, scheduler=scheduler, datasource=datasource, performance=oracle_performance ) return controller
def test_controller_with_xarray(self): calendar_name = "NYSE" data_source_config = { "exchange": calendar_name, "data_timezone": "America/New_York", "filename": "tests/resources/19990101_19990301_3_stocks.nc", "start": datetime.datetime(2006, 12, 31, tzinfo=pytz.utc), "end": datetime.datetime(2011, 12, 31, tzinfo=pytz.utc) } datasource = XArrayDataSource(data_source_config) scheduling_configuration = { "prediction_frequency": { "frequency_type": "DAILY", "days_offset": 0, "minutes_offset": 15 }, "training_frequency": { "frequency_type": "WEEKLY", "days_offset": 0, "minutes_offset": 15 }, } oracle_config = { "prediction_horizon": { "unit": "days", "value": 10 }, "prediction_delta": { 'unit': 'days', 'value': 10 }, "training_delta": { 'unit': 'days', 'value': 20 }, "model": { "constant_variance": 0.1, "past_horizon": datetime.timedelta(days=7), "target_feature": "close" } } oracle = ConstantOracle( calendar_name=calendar_name, scheduling_configuration=scheduling_configuration, oracle_configuration=oracle_config ) # these dates need to be within [start, end] of the data source simulation_start = datetime.datetime(1999, 1, 10, tzinfo=pytz.utc) simulation_end = datetime.datetime(1999, 2, 10, tzinfo=pytz.utc) scheduler = Scheduler(simulation_start, simulation_end, calendar_name, oracle.prediction_frequency, oracle.training_frequency ) controller_configuration = { 'start_date': simulation_start.strftime('%Y-%m-%d'), 'end_date': simulation_end.strftime('%Y-%m-%d') } temp_dir = TemporaryDirectory() oracle_performance = OraclePerformance( temp_dir.name, 'test' ) controller = Controller( configuration=controller_configuration, oracle=oracle, scheduler=scheduler, datasource=datasource, performance=oracle_performance ) controller.run() # Check if files have been writter assert len(glob.glob(temp_dir.name + "/*hdf5")) == 3
def test_run(self): oracle_configuration = { 'prediction_delta': { 'unit': 'days', 'value': 10 }, 'training_delta': { 'unit': 'days', 'value': 20 }, 'prediction_horizon': { 'unit': 'days', 'value': 1 }, 'data_transformation': { 'feature_config_list': [ { 'name': 'close', 'transformation': { 'name': 'log-return' }, 'normalization': 'standard', 'is_target': True, 'local': False, 'length': 5 }, ], 'features_ndays': 10, 'features_resample_minutes': 15, 'fill_limit': 5, }, "model": { 'train_path': OUTPUT_DIR, 'covariance_method': 'NERCOME', 'covariance_ndays': 9, 'model_save_path': OUTPUT_DIR, 'tensorboard_log_path': OUTPUT_DIR, 'd_type': 'float32', 'tf_type': 32, 'random_seed': 0, # Training specific 'predict_single_shares': True, 'n_epochs': 1, 'n_retrain_epochs': 1, 'learning_rate': 2e-3, 'batch_size': 100, 'cost_type': 'bayes', 'n_train_passes': 30, 'n_eval_passes': 100, 'resume_training': False, 'classify_per_series': False, 'normalise_per_series': False, # Topology 'n_series': 3, 'n_assets': 3, 'n_features_per_series': 271, 'n_forecasts': 1, 'n_classification_bins': 12, 'layer_heights': [270, 270], 'layer_widths': [3, 3], 'activation_functions': ['relu', 'relu'], # Initial conditions 'INITIAL_ALPHA': 0.2, 'INITIAL_WEIGHT_UNCERTAINTY': 0.4, 'INITIAL_BIAS_UNCERTAINTY': 0.4, 'INITIAL_WEIGHT_DISPLACEMENT': 0.1, 'INITIAL_BIAS_DISPLACEMENT': 0.4, 'USE_PERFECT_NOISE': True, # Priors 'double_gaussian_weights_prior': False, 'wide_prior_std': 1.2, 'narrow_prior_std': 0.05, 'spike_slab_weighting': 0.5, }, "universe": { "method": "liquidity", "n_assets": 3, "ndays_window": 5, "update_frequency": 'weekly', "avg_function": 'median', "dropna": False }, } scheduling_configuration = { "prediction_frequency": {"frequency_type": "WEEKLY", "days_offset": 0, "minutes_offset": 75}, "training_frequency": {"frequency_type": "WEEKLY", "days_offset": 0, "minutes_offset": 60} } oracle = MetaCrocubotOracle( calendar_name="NYSE", oracle_configuration=oracle_configuration, scheduling_configuration=scheduling_configuration ) simulation_start = datetime.datetime(2009, 1, 5, tzinfo=pytz.utc) simulation_end = datetime.datetime(2009, 1, 31, tzinfo=pytz.utc) calendar_name = 'NYSE' scheduler = Scheduler( simulation_start, simulation_end, calendar_name, oracle.prediction_frequency, oracle.training_frequency, ) oracle_performance = OraclePerformance(OUTPUT_DIR, 'oracle') datasource = SyntheticDataSource({ "start_date": datetime.datetime(2006, 12, 31), "end_date": datetime.datetime(2011, 12, 31), "n_sin_series": 3 }) controller = Controller( configuration={ 'start_date': '2009-01-05', 'end_date': '2009-01-31' }, oracle=oracle, scheduler=scheduler, datasource=datasource, performance=oracle_performance ) controller.run() expected_files = ["oracle_correlation_coefficient.pdf", "oracle_cumulative_returns.pdf", "oracle_data_table.csv", "oracle_oracle_results_actuals.hdf5", "oracle_oracle_results_covariance_matrix.hdf5", "oracle_oracle_results_mean_vector.hdf5", "oracle_performance_table.csv", "time-series-plot.pdf" ] for filename in expected_files: self.assertTrue(os.path.isfile(os.path.join(OUTPUT_DIR, filename)))
"training_frequency": {"frequency_type": "MONTHLY", "days_offset": -1, "minutes_offset": 0} } oracle = MetaCrocubotOracle( calendar_name=CALENDAR_NAME, oracle_configuration=oracle_configuration, scheduling_configuration=scheduling_configuration ) simulation_start = datetime.datetime(2007, 11, 30, tzinfo=pytz.utc) simulation_end = datetime.datetime(2008, 11, 30, tzinfo=pytz.utc) scheduler = Scheduler( simulation_start, simulation_end, CALENDAR_NAME, oracle.prediction_frequency, oracle.training_frequency, ) oracle_performance = OraclePerformance(OUTPUT_DIR, 'oracle') datasource = DataSource({ 'data_file': os.path.join(RESOURCES_DIR, 'test_stock_data.hdf5') }) controller = Controller( configuration={ 'start_date': simulation_start.strftime('%Y-%m-%d'), 'end_date': simulation_end.strftime('%Y-%m-%d') },
def test_run(self): oracle_configuration = { 'prediction_delta': { 'unit': 'days', 'value': 3 }, 'training_delta': { 'unit': 'days', 'value': 12 }, 'prediction_horizon': { 'unit': 'days', 'value': 1 }, 'data_transformation': { 'feature_config_list': [ { 'name': 'Returns', 'transformation': { 'name': 'value' }, 'normalization': None, 'is_target': True, 'local': False, 'length': 5 }, ], 'features_ndays': 5, 'features_resample_minutes': 15, 'fill_limit': 5, }, "model": { 'train_path': OUTPUT_DIR, 'covariance_method': 'NERCOME', 'covariance_ndays': 9, 'model_save_path': OUTPUT_DIR, 'tensorboard_log_path': OUTPUT_DIR, 'd_type': 'float32', 'tf_type': 32, 'random_seed': 0, # Training specific 'predict_single_shares': True, 'n_epochs': 1, 'n_retrain_epochs': 1, 'learning_rate': 2e-3, 'batch_size': 100, 'cost_type': 'bayes', 'n_train_passes': 30, 'n_eval_passes': 100, 'resume_training': False, 'classify_per_series': False, 'normalise_per_series': False, # Topology 'n_series': 324, 'n_assets': 324, 'n_correlated_series': 1, 'n_features_per_series': 271, 'n_forecasts': 1, 'n_classification_bins': 12, 'layer_heights': [270, 270], 'layer_widths': [3, 3], 'activation_functions': ['relu', 'relu'], # Initial conditions 'INITIAL_ALPHA': 0.2, 'INITIAL_WEIGHT_UNCERTAINTY': 0.4, 'INITIAL_BIAS_UNCERTAINTY': 0.4, 'INITIAL_WEIGHT_DISPLACEMENT': 0.1, 'INITIAL_BIAS_DISPLACEMENT': 0.4, 'USE_PERFECT_NOISE': True, # Priors 'double_gaussian_weights_prior': False, 'wide_prior_std': 1.2, 'narrow_prior_std': 0.05, 'spike_slab_weighting': 0.5, } } scheduling_configuration = { "prediction_frequency": { "frequency_type": "MONTHLY", "days_offset": -1, "minutes_offset": 0 }, "training_frequency": { "frequency_type": "MONTHLY", "days_offset": -1, "minutes_offset": 0 } } oracle = MetaCrocubotOracle( calendar_name=CALENDAR_NAME, oracle_configuration=oracle_configuration, scheduling_configuration=scheduling_configuration) simulation_start = datetime.datetime(2007, 12, 31, tzinfo=pytz.utc) simulation_end = datetime.datetime(2008, 12, 31, tzinfo=pytz.utc) scheduler = Scheduler( simulation_start, simulation_end, CALENDAR_NAME, oracle.prediction_frequency, oracle.training_frequency, ) oracle_performance = OraclePerformance(OUTPUT_DIR, 'oracle') datasource = DataSource( {'data_file': os.path.join(RESOURCES_DIR, 'test_stock_data.hdf5')}) controller = Controller(configuration={ 'start_date': simulation_start.strftime('%Y-%m-%d'), 'end_date': simulation_end.strftime('%Y-%m-%d') }, oracle=oracle, scheduler=scheduler, datasource=datasource, performance=oracle_performance) controller.run() expected_files = [ "oracle_correlation_coefficient.pdf", "oracle_cumulative_returns.pdf", "oracle_data_table.csv", "oracle_oracle_results_actuals.hdf5", "oracle_oracle_results_covariance_matrix.hdf5", "oracle_oracle_results_mean_vector.hdf5", "oracle_performance_table.csv", "time-series-plot.pdf" ] for filename in expected_files: self.assertTrue(os.path.isfile(os.path.join(OUTPUT_DIR, filename)))