Esempio n. 1
0
def test_create_partitioner(tmpdir, monkeypatch):
    monkeypatch.chdir(tmpdir)
    params = dict(
        type="CrossValidationPartitioner",
        num_folds=4,
    )
    path = pathlib.Path("params/partitioners/foo.yaml")
    path.parent.mkdir(parents=True)
    path.write_text(yaml.safe_dump(params))

    db = Database(partitioner="foo")
    partitioner = db.create_partitioner()
    assert isinstance(partitioner, CrossValidationPartitioner)
Esempio n. 2
0
def test_dump_estimator_params(tmpdir, monkeypatch):
    monkeypatch.chdir(tmpdir)
    params = dict(
        type="STLQFCNNEstimator",
        time_horizon=Reference("time_horizon"),
        quantile_levels=Reference("quantile_levels"),
        history_shape=[7, 24],
    )
    db = Database(estimator="foo")
    db.dump_estimator_params(params)

    path = pathlib.Path("params/estimators/foo.yaml")
    observed_params = yaml.load(path.read_text(), ReferenceLoader)

    assert observed_params == params
Esempio n. 3
0
def test_build_estimator_params():
    optimizer = STLQFCNNHyperparameterOptimizer(
        db=Database(),
        time_horizon="time_horizon",
        quantile_levels="quantile_levels",
        max_evals=1000,
        space=None,
    )
    params = optimizer.build_estimator_params(
        history_shape=[7, 24],
        conv_layers_params=[
            dict(filter_shape=(3, 5), num_filters=25),
        ],
        fc_layers_params=[
            dict(num_outpus=64, activation="relu"),
            dict(),
        ],
    )
    assert params == dict(
        type="STLQFCNNEstimator",
        time_horizon=Reference("time_horizon"),
        quantile_levels=Reference("quantile_levels"),
        history_shape=[7, 24],
        conv_layers_params=[
            dict(filter_shape=(3, 5), num_filters=25),
        ],
        fc_layers_params=[
            dict(num_outpus=64, activation="relu"),
            dict(),
        ],
    )
Esempio n. 4
0
def test_dump_issue_times(tmpdir, monkeypatch):
    monkeypatch.chdir(tmpdir)
    issue_times = pd.date_range("2010-01-01", "2010-01-30") | pd.date_range(
        "2010-03-02", "2010-03-31")
    db = Database(issue_times="foo")
    db.dump_issue_times(issue_times)

    path = pathlib.Path("params/issue_times/foo.yaml")
    params = yaml.safe_load(path.read_text())
    assert params == dict(
        freq="D",
        issue_times=[
            dict(start=datetime.datetime(2010, 1, 1),
                 end=datetime.datetime(2010, 1, 30)),
            dict(start=datetime.datetime(2010, 3, 2),
                 end=datetime.datetime(2010, 3, 31)),
        ],
    )
Esempio n. 5
0
def test_create_data_reader(tmpdir, monkeypatch):
    monkeypatch.chdir(tmpdir)
    params = dict(
        type="PecanStreetReader",
        base_path="data/pecanstreet",
        city="austin",
        resolution="15min",
    )
    path = pathlib.Path("params/data_readers/foo.yaml")
    path.parent.mkdir(parents=True)
    path.write_text(yaml.safe_dump(params))

    db = Database(data_reader="foo")
    data_reader = db.create_data_reader()
    assert isinstance(data_reader, PecanStreetReader)
    assert data_reader.base_path == pathlib.Path("data/pecanstreet")
    assert data_reader.city == "austin"
    assert data_reader.resolution == "15min"
    assert data_reader.aggregation == 1
    assert data_reader.columns is None
Esempio n. 6
0
def test_dump_partitions(tmpdir, monkeypatch):
    monkeypatch.chdir(tmpdir)
    issue_times1 = pd.date_range("2010-01-01", "2010-01-30") | pd.date_range(
        "2010-03-02", "2010-03-31")
    issue_times2 = pd.date_range("2010-01-31", "2010-03-01") | pd.date_range(
        "2010-04-01", "2010-04-30")
    partitions = [
        IssueTimesPartition(train=issue_times1, test=issue_times2),
        IssueTimesPartition(train=issue_times2, test=issue_times1),
    ]
    db = Database(partitions="foo")
    db.dump_partitions(partitions)

    path = pathlib.Path("params/partitions/foo.yaml")
    params = yaml.safe_load(path.read_text())

    params_issue_times1 = [
        dict(start=datetime.datetime(2010, 1, 1),
             end=datetime.datetime(2010, 1, 30)),
        dict(start=datetime.datetime(2010, 3, 2),
             end=datetime.datetime(2010, 3, 31)),
    ]
    params_issue_times2 = [
        dict(start=datetime.datetime(2010, 1, 31),
             end=datetime.datetime(2010, 3, 1)),
        dict(start=datetime.datetime(2010, 4, 1),
             end=datetime.datetime(2010, 4, 30)),
    ]
    assert params == dict(
        freq="D",
        partitions=[
            dict(
                train=params_issue_times1,
                test=params_issue_times2,
            ),
            dict(
                train=params_issue_times2,
                test=params_issue_times1,
            ),
        ],
    )
Esempio n. 7
0
def test_load_issue_times(tmpdir, monkeypatch):
    monkeypatch.chdir(tmpdir)
    params = dict(
        freq="1d",
        issue_times=[
            dict(start=datetime.date(2010, 1, 1),
                 end=datetime.date(2010, 1, 30)),
            dict(start=datetime.date(2010, 3, 2),
                 end=datetime.date(2010, 3, 31)),
        ],
    )
    path = pathlib.Path("params/issue_times/foo.yaml")
    path.parent.mkdir(parents=True)
    path.write_text(yaml.safe_dump(params))

    db = Database(issue_times="foo")
    issue_times = db.load_issue_times()
    assert isinstance(issue_times, pd.DatetimeIndex)
    assert list(issue_times) == list(
        pd.date_range("2010-01-01", "2010-01-30")
        | pd.date_range("2010-03-02", "2010-03-31"))
Esempio n. 8
0
def test_create_hyperparameter_optimizer(tmpdir, monkeypatch):
    monkeypatch.chdir(tmpdir)
    path = pathlib.Path("params/hyperopt/foo.yaml")
    path.parent.mkdir(parents=True)
    path.write_text("\n".join([
        "type: STLQFCNNHyperparameterOptimizer",
        "time_horizon: time_horizon",
        "quantile_levels: quantile_levels",
        "max_evals: 1000",
        "space: !hp.uniform [0, 5]",
    ]))

    db = Database(hyperopt="foo")
    optimizer = db.create_hyperparameter_optimizer()
    assert isinstance(optimizer, STLQFCNNHyperparameterOptimizer)

    path.write_text("\n".join([
        "type: STLQFCNNHyperparameterOptimizer",
        "time_horizon: time_horizon",
        "quantile_levels: quantile_levels",
        "max_evals: 1000",
        "space: !hp.uniform {low: 0, high: 5}",
    ]))

    db = Database(hyperopt="foo")
    optimizer = db.create_hyperparameter_optimizer()
    assert isinstance(optimizer, STLQFCNNHyperparameterOptimizer)
Esempio n. 9
0
def test_load_partitions(tmpdir, monkeypatch):
    monkeypatch.chdir(tmpdir)
    params_issue_times1 = [
        dict(start=datetime.date(2010, 1, 1), end=datetime.date(2010, 1, 30)),
        dict(start=datetime.date(2010, 3, 2), end=datetime.date(2010, 3, 31)),
    ]
    params_issue_times2 = [
        dict(start=datetime.date(2010, 1, 31), end=datetime.date(2010, 3, 1)),
        dict(start=datetime.date(2010, 4, 1), end=datetime.date(2010, 4, 30)),
    ]
    params = dict(
        freq="1d",
        partitions=[
            dict(
                train=copy.deepcopy(params_issue_times1),
                test=copy.deepcopy(params_issue_times2),
            ),
            dict(
                train=copy.deepcopy(params_issue_times2),
                test=copy.deepcopy(params_issue_times1),
            ),
        ],
    )
    path = pathlib.Path("params/partitions/foo.yaml")
    path.parent.mkdir(parents=True)
    path.write_text(yaml.safe_dump(params))

    db = Database(partitions="foo")
    partitions = db.load_partitions()
    assert len(partitions) == 2
    issue_times1 = list(
        pd.date_range("2010-01-01", "2010-01-30")
        | pd.date_range("2010-03-02", "2010-03-31"))
    issue_times2 = list(
        pd.date_range("2010-01-31", "2010-03-01")
        | pd.date_range("2010-04-01", "2010-04-30"))
    assert list(partitions[0].train) == issue_times1
    assert list(partitions[0].test) == issue_times2
    assert list(partitions[1].train) == issue_times2
    assert list(partitions[1].test) == issue_times1
Esempio n. 10
0
def test_paths():
    # test for exceptions
    db = Database()
    pytest.raises(AttributeError, getattr, db, "issue_times_params_path")
    pytest.raises(AttributeError, getattr, db, "partitioner_params_path")
    pytest.raises(AttributeError, getattr, db, "partitions_params_path")
    pytest.raises(AttributeError, getattr, db, "data_reader_params_path")
    pytest.raises(AttributeError, getattr, db, "estimator_params_path")
    pytest.raises(AttributeError, getattr, db, "time_horizon_params_path")
    pytest.raises(AttributeError, getattr, db, "quantile_levels_params_path")
    pytest.raises(AttributeError, getattr, db, "model_path")
    pytest.raises(AttributeError, getattr, db, "model_estimator_path")
    pytest.raises(AttributeError, getattr, db, "model_partition_path")
    pytest.raises(AttributeError, getattr, db, "predictions_path")
    pytest.raises(AttributeError, getattr, db, "predictions_pickle_path")
Esempio n. 11
0
def test_create_estimator(tmpdir, monkeypatch):
    monkeypatch.chdir(tmpdir)
    params = dict(
        type="NaiveEstimator",
        time_horizon=dict(start=0, stop=600, step=60),
        quantile_levels=dict(numerators=[1, 2, 8, 9], denominator=10),
        load_offset=1440,
        noise_type="multiplicative",
    )
    path = pathlib.Path("params/estimators/foo.yaml")
    path.parent.mkdir(parents=True)
    path.write_text(yaml.safe_dump(params))

    db = Database(estimator="foo")
    estimator = db.create_estimator()
    assert isinstance(estimator, NaiveEstimator)
    assert estimator.time_horizon == TimeHorizon(0, 600, 60)
    assert estimator.quantile_levels == QuantileLevels([1, 2, 8, 9], 10)
    assert estimator.load_offset == 1440
    assert estimator.noise_type == "multiplicative"

    path = pathlib.Path("params/estimators/bar.yaml")
    path.write_text("\n".join([
        "type: NaiveEstimator",
        "time_horizon: !ref bar_horizon",
        "quantile_levels: !ref bar_quantiles",
        "load_offset: 1440",
        "noise_type: multiplicative",
    ]))
    path = pathlib.Path("params/time_horizons/bar_horizon.yaml")
    path.parent.mkdir(parents=True)
    path.write_text("\n".join([
        "start: 0",
        "stop: 1440",
        "step: 60",
    ]))
    path = pathlib.Path("params/quantile_levels/bar_quantiles.yaml")
    path.parent.mkdir(parents=True)
    path.write_text("\n".join([
        "[1, 5, 10, 25, 50, 75, 90, 95, 99]",
    ]))

    db = Database(estimator="bar")
    estimator = db.create_estimator()
    assert isinstance(estimator, NaiveEstimator)
    assert estimator.time_horizon == TimeHorizon(0, 1440, 60)
    assert estimator.quantile_levels == QuantileLevels(
        [1, 5, 10, 25, 50, 75, 90, 95, 99])
    assert estimator.load_offset == 1440
    assert estimator.noise_type == "multiplicative"
Esempio n. 12
0
def test_warm_start(tmpdir, monkeypatch, mocker):
    monkeypatch.chdir(tmpdir)
    mocker.patch.object(cli, "f_train")
    mocker.patch.object(STLQFCNNHyperparameterOptimizer, "calculate_loss_and_duration", return_value=(0, 0, 0))
    mocker.patch.object(Database, "dump_estimator_params")
    db = Database(hyperopt="test")
    space = dict(
        history_shape=[hp.choice("a", [7, 14]), 24],
        learning_rate=hp.loguniform("lr", -13, -4),
        conv_layers_params=[
            dict(
                filter_shape=[hp.choice("b", [1, 3, 5]), hp.choice("c", [3, 5, 7, 9])],
                num_filters=hp.choice("d", [4, 8, 16, 32, 64]),
            ),
        ],
        fc_layers_params=[
            dict(num_outpus=64, activation="relu"),
            dict(),
        ],
    )
    optimizer = STLQFCNNHyperparameterOptimizer(
        db=db,
        time_horizon="time_horizon",
        quantile_levels="quantile_levels",
        max_evals=1,
        space=space,
    )
    f = io.StringIO()
    with redirect_stdout(f):
        for _ in optimizer.optimize():
            pass
    assert cli.f_train.call_count == 1
    assert len(optimizer.trials) == 1

    cli.f_train.reset_mock()
    optimizer.max_evals = optimizer.overall_max_evals = 4
    with redirect_stdout(f):
        for _ in optimizer.optimize():
            pass
    assert cli.f_train.call_count == 3
    assert len(optimizer.trials) == 4
Esempio n. 13
0
def test_apply():
    db = Database()
    assert not hasattr(db, "data_reader")
    assert not hasattr(db, "estimator")
    assert not hasattr(db, "issue_times")
    assert not hasattr(db, "model")
    assert not hasattr(db, "partition_number")
    assert not hasattr(db, "partitioner")
    assert not hasattr(db, "partitions")
    assert not hasattr(db, "quantile_levels")
    assert not hasattr(db, "time_horizon")
    db.apply(estimator="estimator",
             data_reader="data_reader",
             partitions="partitions")
    assert hasattr(db, "estimator")
    assert hasattr(db, "data_reader")
    assert hasattr(db, "partitions")
    assert hasattr(db, "model")
    with pytest.raises(ValueError) as e:
        db.apply(foo="bar")
    assert str(e.value) == "some keys do not exist: foo"
Esempio n. 14
0
def test_optimize(tmpdir, monkeypatch, mocker):
    orig_build_estimator_params = STLQFCNNHyperparameterOptimizer.build_estimator_params

    def mock_build_estimator_params(self, **kwargs):
        if fail_predicate():
            raise BadEstimatorParams
        return orig_build_estimator_params(self, **kwargs)

    monkeypatch.chdir(tmpdir)
    mocker.patch.object(cli, "f_train")
    mocker.patch.object(STLQFCNNHyperparameterOptimizer, "calculate_loss_and_duration", return_value=(0, 0, 0))
    monkeypatch.setattr(STLQFCNNHyperparameterOptimizer, "build_estimator_params", mock_build_estimator_params)
    mocker.patch.object(Database, "dump_estimator_params")
    db = Database(hyperopt="test")
    space = dict(
        history_shape=[hp.choice("a", [7, 14]), 24],
        learning_rate=hp.loguniform("lr", -13, -4),
        conv_layers_params=[
            dict(
                filter_shape=[hp.choice("b", [1, 3, 5]), hp.choice("c", [3, 5, 7, 9])],
                num_filters=hp.choice("d", [4, 8, 16, 32, 64]),
            ),
        ],
        fc_layers_params=[
            dict(num_outpus=64, activation="relu"),
            dict(),
        ],
    )

    # all ok
    optimizer = STLQFCNNHyperparameterOptimizer(
        db=db,
        time_horizon="time_horizon",
        quantile_levels="quantile_levels",
        max_evals=37,
        max_chunk_size=10,
        space=space,
    )
    fail_predicate = lambda: False
    expected_trials = [
        (10, 10),
        (20, 20),
        (30, 30),
        (37, 37),
    ]
    f = io.StringIO()
    with redirect_stdout(f):
        for i, (num_trials, num_ok_trials, best_trial) in enumerate(optimizer.optimize()):
            assert (num_trials, num_ok_trials) == expected_trials[i]

    # all fail
    optimizer = STLQFCNNHyperparameterOptimizer(
        db=db,
        time_horizon="time_horizon",
        quantile_levels="quantile_levels",
        max_evals=37,
        max_chunk_size=10,
        space=space,
    )
    fail_predicate = lambda: True
    expected_trials = [
        (10, 0),
        (20, 0),
        (30, 0),
        (37, 0),
    ]
    f = io.StringIO()
    with redirect_stdout(f):
        for i, (num_trials, num_ok_trials, best_trial) in enumerate(optimizer.optimize()):
            assert (num_trials, num_ok_trials) == expected_trials[i]

    # 1/3 fail, stop by overall_max_evals
    optimizer = STLQFCNNHyperparameterOptimizer(
        db=db,
        time_horizon="time_horizon",
        quantile_levels="quantile_levels",
        max_evals=37,
        overall_max_evals_factor=1,
        max_chunk_size=10,
        space=space,
    )
    fail_predicate = lambda: len(optimizer.trials) % 3 == 1  # trails contain unfinished tasks
    expected_trials = [
        (10, 6),
        (20, 13),
        (30, 20),
        (37, 24),
    ]
    f = io.StringIO()
    with redirect_stdout(f):
        for i, (num_trials, num_ok_trials, best_trial) in enumerate(optimizer.optimize()):
            assert (num_trials, num_ok_trials) == expected_trials[i]

    # 1/3 fail, stop by max_evals
    optimizer = STLQFCNNHyperparameterOptimizer(
        db=db,
        time_horizon="time_horizon",
        quantile_levels="quantile_levels",
        max_evals=37,
        overall_max_evals_factor=2,
        max_chunk_size=10,
        space=space,
    )
    fail_predicate = lambda: len(optimizer.trials) % 3 == 1
    expected_trials = [
        (10, 6),
        (20, 13),
        (30, 20),
        (40, 26),
        (50, 33),
        (56, 37),
    ]
    f = io.StringIO()
    with redirect_stdout(f):
        for i, (num_trials, num_ok_trials, best_trial) in enumerate(optimizer.optimize()):
            assert (num_trials, num_ok_trials) == expected_trials[i]