Exemple #1
0
    def _create_dummy_controller(self, simulation_end, simulation_start, temp_dir, calendar_name,
                                 oracle_config,
                                 scheduling_config):
        data_source_config = {
            "filename": TEST_HDF5FILE_NAME,
            "exchange": calendar_name,
            "start": datetime.datetime(1999, 1, 11, tzinfo=pytz.utc),
            "end": datetime.datetime(1999, 1, 11, tzinfo=pytz.utc)
        }

        datasource = DummyDataSource(data_source_config)

        oracle = ConstantOracle(
            calendar_name=calendar_name,
            oracle_configuration=oracle_config,
            scheduling_configuration=scheduling_config
        )

        scheduler = Scheduler(
            simulation_start,
            simulation_end,
            calendar_name,
            oracle.prediction_frequency,
            oracle.training_frequency
        )

        controller_configuration = {
            'start_date': simulation_start.strftime('%Y-%m-%d'),
            'end_date': simulation_end.strftime('%Y-%m-%d')
        }

        oracle_performance = OraclePerformance(
            temp_dir.name, 'test'
        )
        controller = Controller(
            configuration=controller_configuration,
            oracle=oracle,
            scheduler=scheduler,
            datasource=datasource,
            performance=oracle_performance
        )
        return controller
Exemple #2
0
    def test_controller_with_xarray(self):
        calendar_name = "NYSE"
        data_source_config = {
            "exchange": calendar_name,
            "data_timezone": "America/New_York",
            "filename": "tests/resources/19990101_19990301_3_stocks.nc",
            "start": datetime.datetime(2006, 12, 31, tzinfo=pytz.utc),
            "end": datetime.datetime(2011, 12, 31, tzinfo=pytz.utc)
        }
        datasource = XArrayDataSource(data_source_config)

        scheduling_configuration = {
            "prediction_frequency":
                {
                    "frequency_type": "DAILY",
                    "days_offset": 0,
                    "minutes_offset": 15
                },

            "training_frequency":
                {
                    "frequency_type": "WEEKLY",
                    "days_offset": 0,
                    "minutes_offset": 15
                },
        }
        oracle_config = {
            "prediction_horizon": {
                "unit": "days",
                "value": 10
            },
            "prediction_delta": {
                'unit': 'days',
                'value': 10
            },
            "training_delta": {
                'unit': 'days',
                'value': 20
            },
            "model": {
                "constant_variance": 0.1,
                "past_horizon": datetime.timedelta(days=7),
                "target_feature": "close"
            }
        }

        oracle = ConstantOracle(
            calendar_name=calendar_name,
            scheduling_configuration=scheduling_configuration,
            oracle_configuration=oracle_config
        )

        # these dates need to be within [start, end] of the data source
        simulation_start = datetime.datetime(1999, 1, 10, tzinfo=pytz.utc)
        simulation_end = datetime.datetime(1999, 2, 10, tzinfo=pytz.utc)
        scheduler = Scheduler(simulation_start,
                              simulation_end,
                              calendar_name,
                              oracle.prediction_frequency,
                              oracle.training_frequency
                              )

        controller_configuration = {
            'start_date': simulation_start.strftime('%Y-%m-%d'),
            'end_date': simulation_end.strftime('%Y-%m-%d')
        }

        temp_dir = TemporaryDirectory()
        oracle_performance = OraclePerformance(
            temp_dir.name, 'test'
        )

        controller = Controller(
            configuration=controller_configuration,
            oracle=oracle,
            scheduler=scheduler,
            datasource=datasource,
            performance=oracle_performance
        )

        controller.run()

        # Check if files have been writter
        assert len(glob.glob(temp_dir.name + "/*hdf5")) == 3
Exemple #3
0
    def test_run(self):
        oracle_configuration = {
            'prediction_delta': {
                'unit': 'days',
                'value': 10
            },
            'training_delta': {
                'unit': 'days',
                'value': 20
            },
            'prediction_horizon': {
                'unit': 'days',
                'value': 1
            },
            'data_transformation': {
                'feature_config_list': [
                    {
                        'name': 'close',
                        'transformation': {
                            'name': 'log-return'
                        },
                        'normalization': 'standard',
                        'is_target': True,
                        'local': False,
                        'length': 5
                    },
                ],
                'features_ndays': 10,
                'features_resample_minutes': 15,
                'fill_limit': 5,
            },
            "model": {
                'train_path': OUTPUT_DIR,
                'covariance_method': 'NERCOME',
                'covariance_ndays': 9,
                'model_save_path': OUTPUT_DIR,
                'tensorboard_log_path': OUTPUT_DIR,
                'd_type': 'float32',
                'tf_type': 32,
                'random_seed': 0,

                # Training specific
                'predict_single_shares': True,
                'n_epochs': 1,
                'n_retrain_epochs': 1,
                'learning_rate': 2e-3,
                'batch_size': 100,
                'cost_type': 'bayes',
                'n_train_passes': 30,
                'n_eval_passes': 100,
                'resume_training': False,
                'classify_per_series': False,
                'normalise_per_series': False,

                # Topology
                'n_series': 3,
                'n_assets': 3,
                'n_features_per_series': 271,
                'n_forecasts': 1,
                'n_classification_bins': 12,
                'layer_heights': [270, 270],
                'layer_widths': [3, 3],
                'activation_functions': ['relu', 'relu'],

                # Initial conditions
                'INITIAL_ALPHA': 0.2,
                'INITIAL_WEIGHT_UNCERTAINTY': 0.4,
                'INITIAL_BIAS_UNCERTAINTY': 0.4,
                'INITIAL_WEIGHT_DISPLACEMENT': 0.1,
                'INITIAL_BIAS_DISPLACEMENT': 0.4,
                'USE_PERFECT_NOISE': True,

                # Priors
                'double_gaussian_weights_prior': False,
                'wide_prior_std': 1.2,
                'narrow_prior_std': 0.05,
                'spike_slab_weighting': 0.5,
            },
            "universe": {
                "method": "liquidity",
                "n_assets": 3,
                "ndays_window": 5,
                "update_frequency": 'weekly',
                "avg_function": 'median',
                "dropna": False
            },
        }

        scheduling_configuration = {
            "prediction_frequency": {"frequency_type": "WEEKLY", "days_offset": 0, "minutes_offset": 75},
            "training_frequency": {"frequency_type": "WEEKLY", "days_offset": 0, "minutes_offset": 60}
        }

        oracle = MetaCrocubotOracle(
            calendar_name="NYSE",
            oracle_configuration=oracle_configuration,
            scheduling_configuration=scheduling_configuration
        )

        simulation_start = datetime.datetime(2009, 1, 5, tzinfo=pytz.utc)
        simulation_end = datetime.datetime(2009, 1, 31, tzinfo=pytz.utc)
        calendar_name = 'NYSE'

        scheduler = Scheduler(
            simulation_start,
            simulation_end,
            calendar_name,
            oracle.prediction_frequency,
            oracle.training_frequency,
        )

        oracle_performance = OraclePerformance(OUTPUT_DIR, 'oracle')

        datasource = SyntheticDataSource({
            "start_date": datetime.datetime(2006, 12, 31),
            "end_date": datetime.datetime(2011, 12, 31),
            "n_sin_series": 3
        })

        controller = Controller(
            configuration={
                'start_date': '2009-01-05',
                'end_date': '2009-01-31'
            },
            oracle=oracle,
            scheduler=scheduler,
            datasource=datasource,
            performance=oracle_performance
        )

        controller.run()

        expected_files = ["oracle_correlation_coefficient.pdf",
                          "oracle_cumulative_returns.pdf",
                          "oracle_data_table.csv",
                          "oracle_oracle_results_actuals.hdf5",
                          "oracle_oracle_results_covariance_matrix.hdf5",
                          "oracle_oracle_results_mean_vector.hdf5",
                          "oracle_performance_table.csv",
                          "time-series-plot.pdf"
                          ]

        for filename in expected_files:
            self.assertTrue(os.path.isfile(os.path.join(OUTPUT_DIR, filename)))
Exemple #4
0
simulation_start = datetime.datetime(2007, 11, 30, tzinfo=pytz.utc)
simulation_end = datetime.datetime(2008, 11, 30, tzinfo=pytz.utc)

scheduler = Scheduler(
    simulation_start,
    simulation_end,
    CALENDAR_NAME,
    oracle.prediction_frequency,
    oracle.training_frequency,
)

oracle_performance = OraclePerformance(OUTPUT_DIR, 'oracle')

datasource = DataSource({
    'data_file': os.path.join(RESOURCES_DIR, 'test_stock_data.hdf5')
})

controller = Controller(
    configuration={
        'start_date': simulation_start.strftime('%Y-%m-%d'),
        'end_date': simulation_end.strftime('%Y-%m-%d')
    },
    oracle=oracle,
    scheduler=scheduler,
    datasource=datasource,
    performance=oracle_performance
)

controller.run()
Exemple #5
0
    def test_run(self):
        oracle_configuration = {
            'prediction_delta': {
                'unit': 'days',
                'value': 3
            },
            'training_delta': {
                'unit': 'days',
                'value': 12
            },
            'prediction_horizon': {
                'unit': 'days',
                'value': 1
            },
            'data_transformation': {
                'feature_config_list': [
                    {
                        'name': 'Returns',
                        'transformation': {
                            'name': 'value'
                        },
                        'normalization': None,
                        'is_target': True,
                        'local': False,
                        'length': 5
                    },
                ],
                'features_ndays':
                5,
                'features_resample_minutes':
                15,
                'fill_limit':
                5,
            },
            "model": {
                'train_path': OUTPUT_DIR,
                'covariance_method': 'NERCOME',
                'covariance_ndays': 9,
                'model_save_path': OUTPUT_DIR,
                'tensorboard_log_path': OUTPUT_DIR,
                'd_type': 'float32',
                'tf_type': 32,
                'random_seed': 0,

                # Training specific
                'predict_single_shares': True,
                'n_epochs': 1,
                'n_retrain_epochs': 1,
                'learning_rate': 2e-3,
                'batch_size': 100,
                'cost_type': 'bayes',
                'n_train_passes': 30,
                'n_eval_passes': 100,
                'resume_training': False,
                'classify_per_series': False,
                'normalise_per_series': False,

                # Topology
                'n_series': 324,
                'n_assets': 324,
                'n_correlated_series': 1,
                'n_features_per_series': 271,
                'n_forecasts': 1,
                'n_classification_bins': 12,
                'layer_heights': [270, 270],
                'layer_widths': [3, 3],
                'activation_functions': ['relu', 'relu'],

                # Initial conditions
                'INITIAL_ALPHA': 0.2,
                'INITIAL_WEIGHT_UNCERTAINTY': 0.4,
                'INITIAL_BIAS_UNCERTAINTY': 0.4,
                'INITIAL_WEIGHT_DISPLACEMENT': 0.1,
                'INITIAL_BIAS_DISPLACEMENT': 0.4,
                'USE_PERFECT_NOISE': True,

                # Priors
                'double_gaussian_weights_prior': False,
                'wide_prior_std': 1.2,
                'narrow_prior_std': 0.05,
                'spike_slab_weighting': 0.5,
            }
        }

        scheduling_configuration = {
            "prediction_frequency": {
                "frequency_type": "MONTHLY",
                "days_offset": -1,
                "minutes_offset": 0
            },
            "training_frequency": {
                "frequency_type": "MONTHLY",
                "days_offset": -1,
                "minutes_offset": 0
            }
        }

        oracle = MetaCrocubotOracle(
            calendar_name=CALENDAR_NAME,
            oracle_configuration=oracle_configuration,
            scheduling_configuration=scheduling_configuration)

        simulation_start = datetime.datetime(2007, 12, 31, tzinfo=pytz.utc)
        simulation_end = datetime.datetime(2008, 12, 31, tzinfo=pytz.utc)

        scheduler = Scheduler(
            simulation_start,
            simulation_end,
            CALENDAR_NAME,
            oracle.prediction_frequency,
            oracle.training_frequency,
        )

        oracle_performance = OraclePerformance(OUTPUT_DIR, 'oracle')

        datasource = DataSource(
            {'data_file': os.path.join(RESOURCES_DIR, 'test_stock_data.hdf5')})

        controller = Controller(configuration={
            'start_date':
            simulation_start.strftime('%Y-%m-%d'),
            'end_date':
            simulation_end.strftime('%Y-%m-%d')
        },
                                oracle=oracle,
                                scheduler=scheduler,
                                datasource=datasource,
                                performance=oracle_performance)

        controller.run()

        expected_files = [
            "oracle_correlation_coefficient.pdf",
            "oracle_cumulative_returns.pdf", "oracle_data_table.csv",
            "oracle_oracle_results_actuals.hdf5",
            "oracle_oracle_results_covariance_matrix.hdf5",
            "oracle_oracle_results_mean_vector.hdf5",
            "oracle_performance_table.csv", "time-series-plot.pdf"
        ]

        for filename in expected_files:
            self.assertTrue(os.path.isfile(os.path.join(OUTPUT_DIR, filename)))