Beispiel #1
0
def test_set_hp():
    input_node = ak.Input(shape=(32, ))
    output_node = input_node
    output_node = ak.DenseBlock()(output_node)
    head = ak.RegressionHead()
    head.output_shape = (1, )
    output_node = head(output_node)

    graph = graph_module.Graph(
        inputs=input_node,
        outputs=output_node,
        override_hps=[
            hp_module.Choice("dense_block_1/num_layers", [6], default=6)
        ],
    )
    hp = kerastuner.HyperParameters()
    graph.build(hp)

    for single_hp in hp.space:
        if single_hp.name == "dense_block_1/num_layers":
            assert len(single_hp.values) == 1
            assert single_hp.values[0] == 6
            return
    assert False
Beispiel #2
0
    def _convert_to_kerastuner_hyperparameters(
        self, candidate_hparams: List[Dict[str, Any]]
    ) -> List[kerastuner.HyperParameters]:
        """Convert list of HSpace to a list of search space each with cardinality 1.

    Args:
      candidate_hparams: List of Dict of HParams with same keys.

    Returns:
      The list of hparams in the search space.
    """

        if not candidate_hparams:
            raise ValueError(
                f'Expected a non-empty list of candidate_hparams. Got {candidate_hparams}'
            )

        simple_search_space_list = []
        for candidate_hparam in candidate_hparams:
            simple_search_space = kerastuner.HyperParameters()
            for key in candidate_hparam:
                simple_search_space.Choice(key, [candidate_hparam[key]])
            simple_search_space_list.append(simple_search_space)
        return simple_search_space_list
def check_initial_hp(initial_hp, graph):
    hp = kerastuner.HyperParameters()
    hp.values = copy.copy(initial_hp)
    graph.build(hp)
    assert hp.values == initial_hp
Beispiel #4
0
def get_hyperparam() -> kerastuner.HyperParameters:

    hp = kerastuner.HyperParameters()
    hp.Choice(name='learning_rate', values=[1e-2, 1e-3], default=1e-2)
    return hp
    # Defining Optimizer

    # Compiling the model
    Nadam = tf.keras.optimizers.Nadam(learning_rate=0.002,
                                      beta_1=0.9,
                                      beta_2=0.999,
                                      epsilon=1e-07,
                                      name='Nadam')
    # Compile model
    model.compile(loss='mse', optimizer=Nadam, metrics=['mape'])

    return model


hp = kt.HyperParameters()
model = build_model(hp)

tuner = Hyperband(build_model,
                  objective=kt.Objective('val_mape', direction='min'),
                  max_epochs=2000,
                  hyperband_iterations=2,
                  directory='HyperBandTrials',
                  project_name='PressureOpti_hl')

# Defining the Early Stopping Function
early_stopping_callback = EarlyStopping(monitor='val_mape',
                                        patience=500,
                                        min_delta=1e-4,
                                        restore_best_weights=True,
                                        mode='auto',
Beispiel #6
0
def test_time_series_input_node_build_no_error():
    node = nodes.TimeseriesInput(lookback=2, shape=(32,))
    hp = kerastuner.HyperParameters()

    input_node = node.build_node(hp)
    node.build(hp, input_node)
Beispiel #7
0
def main():
    base_folder, return_list, test, timer, train, val = init_test()
    prefix = ''
    test_name = 'best_model_comparison2'
    print('Running best model comparison')

    csv_file = base_folder / 'temp' / f'{prefix}{test_name}.csv'

    for repetition in range(7):
        config = Config()
        configuration_name = ModelType.BASELINE
        config.training.model_type = 'baseline'
        config.training.cnn_repetition = 0
        config.training.lstm_repetition = 1
        config.training.dense_repetition = 0
        config.training.model_size = 384
        config.training.dropout = 0
        config.training.initial_learning_rate = 0.001
        config.training.batch_size = 128
        config.training.label_smoothing = 0
        config.training.mixup_alpha = 0
        config.training.l2_regularization = 0
        config.training.x_groups = [
            ['prev_word_id'],
        ]
        config.training.y_groups = [
            ['word_id'],
        ]
        hp = None

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config = Config()
        configuration_name = ModelType.DDC
        config.training.model_type = 'ddc'
        config.training.cnn_repetition = 0
        config.training.lstm_repetition = 2
        config.training.dense_repetition = 0
        config.training.model_size = 512
        config.training.dropout = 0.5
        config.training.initial_learning_rate = 0.001
        config.training.batch_size = 64
        config.training.label_smoothing = 0
        config.training.mixup_alpha = 0
        config.training.l2_regularization = 0
        config.training.x_groups = [['prev_word_id'], [
            'prev',
            'next',
        ]]
        config.training.y_groups = [
            ['word_id'],
        ]
        hp = None

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config = Config()
        configuration_name = 'Custom vec+id:id'
        config.training.model_type = ModelType.CUSTOM
        config.training.cnn_repetition = 2
        config.training.lstm_repetition = 2
        config.training.dense_repetition = 2
        config.training.model_size = 512
        config.training.dropout = 0.4
        config.training.initial_learning_rate = 1e-2
        config.training.batch_size = 128
        config.training.label_smoothing = 0.5
        config.training.mixup_alpha = 0.5
        config.training.l2_regularization = 0
        config.training.x_groups = [['prev_word_id', 'prev_word_vec'],
                                    DatasetConfig().categorical,
                                    DatasetConfig().audio,
                                    DatasetConfig().regression]
        config.training.y_groups = [
            ['word_id'],
        ]
        hp = None

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config = Config()
        configuration_name = 'Custom vec+id:vec'
        config.training.model_type = ModelType.CUSTOM
        config.training.cnn_repetition = 2
        config.training.lstm_repetition = 2
        config.training.dense_repetition = 0
        config.training.model_size = 512
        config.training.dropout = 0.4
        config.training.initial_learning_rate = 1e-2
        config.training.batch_size = 128
        config.training.label_smoothing = 0.5
        config.training.mixup_alpha = 0.5
        config.training.l2_regularization = 0
        config.training.x_groups = [['prev_word_id', 'prev_word_vec'],
                                    DatasetConfig().categorical,
                                    DatasetConfig().audio,
                                    DatasetConfig().regression]
        config.training.y_groups = [
            ['word_vec'],
        ]
        hp = None

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        config = Config()
        configuration_name = 'MLSTM'
        config.training.model_type = ModelType.TUNE_MLSTM
        config.training.batch_size = 128
        config.training.label_smoothing = 0.5
        config.training.mixup_alpha = 0.5
        config.training.x_groups = [['prev_word_id', 'prev_word_vec'],
                                    DatasetConfig().categorical,
                                    DatasetConfig().audio,
                                    DatasetConfig().regression]
        config.training.y_groups = [
            ['word_id'],
        ]
        hp = kt.HyperParameters()
        fixed_params = {
            'connections_0': 2,
            'connections_1': 2,
            'connections_2': 2,
            'connections_3': 3,
            'connections_4': 1,
            'connections_5': 3,
            'connections_6': 2,
            'depth_0': 18,
            'depth_1': 23,
            'depth_2': 43,
            'depth_3': 13,
            'depth_4': 52,
            'depth_5': 5,
            'depth_6': 11,
            'dropout_0': 0.25612932926324405,
            'dropout_1': 0.1620424523625309,
            'dropout_2': 0.4720468723284278,
            'dropout_3': 0.43881829788147036,
            'dropout_4': 0.44741780640383355,
            'dropout_5': 0.3327191857714107,
            'dropout_6': 0.1367707920005909,
            'initial_learning_rate': 0.008,
            'label_smoothing': 0.13716631669361445,
            'lstm_layers': 3,
            'width_0': 16,
            'width_1': 9,
            'width_2': 15,
            'width_3': 16,
            'width_4': 5,
            'width_5': 11,
            'width_6': 4,
        }
        for param, value in fixed_params.items():
            hp.Fixed(param, value=value)

        eval_config(csv_file, timer, return_list, train, val, test, config,
                    test_name, configuration_name, hp)

        pass
def test_hyperband_oracle_one_sweep_parallel(tmp_dir):
    hp = kt.HyperParameters()
    hp.Float('a', -100, 100)
    hp.Float('b', -100, 100)
    oracle = hyperband_module.HyperbandOracle(hyperparameters=hp,
                                              objective='score',
                                              hyperband_iterations=1,
                                              max_epochs=4,
                                              factor=2)
    oracle._set_project_dir(tmp_dir, 'untitled')

    # All round 0 trials from different brackets can be run
    # in parallel.
    round0_trials = []
    for i in range(10):
        t = oracle.create_trial('tuner' + str(i))
        assert t.status == 'RUNNING'
        round0_trials.append(t)

    assert len(oracle._brackets) == 3

    # Round 1 can't be run until enough models from round 0
    # have completed.
    t = oracle.create_trial('tuner10')
    assert t.status == 'IDLE'

    for t in round0_trials:
        oracle.update_trial(t.trial_id, {'score': 1})
        oracle.end_trial(t.trial_id, 'COMPLETED')

    round1_trials = []
    for i in range(4):
        t = oracle.create_trial('tuner' + str(i))
        assert t.status == 'RUNNING'
        round1_trials.append(t)

    # Bracket 0 is complete as it only has round 0.
    assert len(oracle._brackets) == 2

    # Round 2 can't be run until enough models from round 1
    # have completed.
    t = oracle.create_trial('tuner10')
    assert t.status == 'IDLE'

    for t in round1_trials:
        oracle.update_trial(t.trial_id, {'score': 1})
        oracle.end_trial(t.trial_id, 'COMPLETED')

    # Only one trial runs in round 2.
    round2_trial = oracle.create_trial('tuner0')

    assert len(oracle._brackets) == 1

    # No more trials to run, but wait for existing brackets to end.
    t = oracle.create_trial('tuner10')
    assert t.status == 'IDLE'

    oracle.update_trial(round2_trial.trial_id, {'score': 1})
    oracle.end_trial(round2_trial.trial_id, 'COMPLETED')

    t = oracle.create_trial('tuner10')
    assert t.status == 'STOPPED', oracle._current_sweep
Beispiel #9
0
    def fit(self,
            x=None,
            y=None,
            epochs=None,
            callbacks=None,
            validation_split=0.2,
            validation_data=None,
            **kwargs):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray or tensorflow.Dataset. Training data x.
            y: numpy.ndarray or tensorflow.Dataset. Training data y.
            epochs: Int. The number of epochs to train each model during the search.
                If unspecified, by default we train for a maximum of 1000 epochs,
                but we stop training if the validation loss stops improving for 10
                epochs (unless you specified an EarlyStopping callback as part of
                the callbacks argument, in which case the EarlyStopping callback you
                specified will determine early stopping).
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1. Defaults to 0.2.
                Fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate
                the loss and any model metrics
                on this data at the end of each epoch.
                The validation data is selected from the last samples
                in the `x` and `y` data provided, before shuffling. This argument is
                not supported when `x` is a dataset.
                The best model found would be fit on the entire dataset including the
                validation data.
            validation_data: Data on which to evaluate the loss and any model metrics
                at the end of each epoch. The model will not be trained on this data.
                `validation_data` will override `validation_split`. The type of the
                validation data should be the same as the training data.
                The best model found would be fit on the training dataset without the
                validation data.
            **kwargs: Any arguments supported by keras.Model.fit.
        """
        dataset, validation_data = self._prepare_data(
            x=x,
            y=y,
            validation_data=validation_data,
            validation_split=validation_split)

        # Initialize the hyper_graph.
        self._meta_build(dataset)

        # Initialize the Tuner.
        # The hypermodel needs input_shape, which can only be known after
        # preprocessing. So we preprocess the dataset once to get the input_shape,
        # so that the hypermodel can be built in the initializer of the Tuner, which
        # does not access the dataset.
        hp = kerastuner.HyperParameters()
        preprocess_graph, keras_graph = self.hyper_graph.build_graphs(hp)
        preprocess_graph.preprocess(dataset=dataset,
                                    validation_data=validation_data,
                                    fit=True)
        self.tuner = self.tuner(hyper_graph=self.hyper_graph,
                                hypermodel=keras_graph,
                                fit_on_val_data=self._split_dataset,
                                overwrite=self.overwrite,
                                objective=self.objective,
                                max_trials=self.max_trials,
                                directory=self.directory,
                                seed=self.seed,
                                project_name=self.name)

        # Process the args.
        if callbacks is None:
            callbacks = []
        if epochs is None:
            epochs = 1000
            if not any([
                    isinstance(callback, tf.keras.callbacks.EarlyStopping)
                    for callback in callbacks
            ]):
                callbacks = callbacks + [
                    tf.keras.callbacks.EarlyStopping(patience=10)
                ]

        self.tuner.search(x=dataset,
                          epochs=epochs,
                          callbacks=callbacks,
                          validation_data=validation_data,
                          **kwargs)
Beispiel #10
0
def test_greedy_oracle_get_state_update_space_can_run():
    oracle = greedy.GreedyOracle(objective="val_loss")
    oracle.set_state(oracle.get_state())
    hp = kerastuner.HyperParameters()
    hp.Boolean("test")
    oracle.update_space(hp)
Beispiel #11
0
def test_lightgbm_classifier_block():
    input_node = ak.Input()
    hp = kerastuner.HyperParameters()
    lgbm_classifier = ak.LightGBMClassifierBlock()
    output_node = lgbm_classifier.build(hp=hp, inputs=input_node)
    assert isinstance(output_node[0], node.Node)
def main():
    base_folder, return_list, test, timer, train, val = init_test()

    seed = 43  # random, non-fine tuned seed
    tf.random.set_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    config = Config()

    config.dataset.storage_folder = base_folder / 'new_datasets'
    config.audio_processing.use_cache = True

    model_path = base_folder / 'temp'
    model_path.mkdir(parents=True, exist_ok=True)

    # The best found temperature is ~0.91
    configuration_name = 'MLSTM'
    config.training.model_type = ModelType.TUNE_MLSTM
    config.training.batch_size = 128
    config.training.label_smoothing = 0.5
    config.training.mixup_alpha = 0.5
    config.training.x_groups = [['prev_word_id', 'prev_word_vec'],
                                DatasetConfig().categorical,
                                DatasetConfig().audio,
                                DatasetConfig().regression]
    config.training.y_groups = [
        ['word_id'],
    ]
    hp = kt.HyperParameters()
    fixed_params = {
        'connections_0': 2,
        'connections_1': 2,
        'connections_2': 2,
        'connections_3': 3,
        'connections_4': 1,
        'connections_5': 3,
        'connections_6': 2,
        'depth_0': 18,
        'depth_1': 23,
        'depth_2': 43,
        'depth_3': 13,
        'depth_4': 52,
        'depth_5': 5,
        'depth_6': 11,
        'dropout_0': 0.25612932926324405,
        'dropout_1': 0.1620424523625309,
        'dropout_2': 0.4720468723284278,
        'dropout_3': 0.43881829788147036,
        'dropout_4': 0.44741780640383355,
        'dropout_5': 0.3327191857714107,
        'dropout_6': 0.1367707920005909,
        'initial_learning_rate': 0.008,
        'label_smoothing': 0.13716631669361445,
        'lstm_layers': 3,
        'width_0': 16,
        'width_1': 9,
        'width_2': 15,
        'width_3': 16,
        'width_4': 5,
        'width_5': 11,
        'width_6': 4,
    }
    for param, value in fixed_params.items():
        hp.Fixed(param, value=value)
    find_temperature_and_generate(base_folder, train, val, test, model_path,
                                  configuration_name, deepcopy(config), hp)

    # The best found temperature is ~0.71
    configuration_name = 'vec:id'
    config.training.model_type = ModelType.CUSTOM
    config.training.cnn_repetition = 2
    config.training.lstm_repetition = 2
    config.training.dense_repetition = 0
    config.training.model_size = 512
    config.training.dropout = 0.4
    config.training.initial_learning_rate = 1e-2
    config.training.batch_size = 128
    config.training.label_smoothing = 0.5
    config.training.mixup_alpha = 0.5
    config.training.l2_regularization = 0
    config.training.x_groups = [[
        'prev_word_id',
    ],
                                DatasetConfig().categorical,
                                DatasetConfig().audio,
                                DatasetConfig().regression]
    config.training.y_groups = [
        ['word_id'],
    ]
    find_temperature_and_generate(base_folder, train, val, test, model_path,
                                  configuration_name, deepcopy(config))

    # The best found temperature is ~0.147
    configuration_name = 'vec:vec'
    config.training.model_type = ModelType.CUSTOM
    config.training.cnn_repetition = 2
    config.training.lstm_repetition = 2
    config.training.dense_repetition = 0
    config.training.model_size = 512
    config.training.dropout = 0.4
    config.training.initial_learning_rate = 1e-2
    config.training.batch_size = 128
    config.training.label_smoothing = 0.5
    config.training.mixup_alpha = 0.5
    config.training.l2_regularization = 0
    config.training.x_groups = [[
        'prev_word_vec',
    ],
                                DatasetConfig().categorical,
                                DatasetConfig().audio,
                                DatasetConfig().regression]
    config.training.y_groups = [
        ['word_vec'],
    ]
    find_temperature_and_generate(base_folder, train, val, test, model_path,
                                  configuration_name, deepcopy(config))
Beispiel #13
0
def _get_hyperparameters() -> kerastuner.HyperParameters:
    hp = kerastuner.HyperParameters()
    hp.Choice(H_SIZE, [5, 10])
    return hp
Beispiel #14
0
def main():
    args = get_args()
    folder = args["inp-folder"]
    del args["inp-folder"]

    loc = os.path.dirname(sys.argv[0])
    data_loc = os.path.join(loc, "Data")
    log_dir = os.path.join(loc, "logs")
    log_search_dir = os.path.join(
        log_dir,
        datetime.datetime.now().strftime("%m%d-%H%M"))

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    if not os.path.exists(log_search_dir):
        os.mkdir(log_search_dir)

    x_tr, x_te, y_tr, y_te, i_ref, o_ref = load_inp_folder(
        os.path.join(data_loc, folder))

    print(i_ref)
    print(o_ref)

    start = time.time()

    hp = kt.HyperParameters()
    if len(y_tr.shape) > 1:
        hp.Fixed("out", y_tr.shape[1])
    else:
        hp.Fixed("out", 1)
    hp.Fixed("norm", False)
    hp.Fixed("drop_out", 0.0)

    num_trials = 100

    tuner = kt.tuners.BayesianOptimization(
        hp_estimator,
        "val_mae",
        num_trials,
        num_initial_points=10,
        directory=log_search_dir,
        hyperparameters=hp,
        distribution_strategy=tf.distribute.MirroredStrategy(),
        project_name="XFEL-ANN")

    rand_tuner = kt.tuners.RandomSearch(
        hp_estimator,
        "val_mae",
        num_trials,
        directory=log_search_dir,
        hyperparameters=hp,
        distribution_strategy=tf.distribute.MirroredStrategy(),
        project_name="XFEL-ANN")

    print(tuner.search_space_summary())

    x_tr_v, x_val, y_tr_v, y_val = train_test_split(x_tr,
                                                    y_tr)  # get validation set

    tuner.search(x_tr_v,
                 y_tr_v,
                 epochs=3000,
                 verbose=2,
                 validation_data=(x_val, y_val),
                 batch_size=1000,
                 callbacks=[
                     tf.keras.callbacks.EarlyStopping(monitor='val_mae',
                                                      patience=3),
                     tf.keras.callbacks.TensorBoard('history')
                 ])

    rand_tuner.search(x_tr_v,
                      y_tr_v,
                      epochs=3000,
                      verbose=2,
                      validation_data=(x_val, y_val),
                      batch_size=1000,
                      callbacks=[
                          tf.keras.callbacks.EarlyStopping(monitor='val_mae',
                                                           patience=3),
                          tf.keras.callbacks.TensorBoard('history')
                      ])

    dur = time.time() - start
    print(f"Runtime: {dur}s")
    trials = tuner.oracle.get_best_trials(num_trials=num_trials)
    rand_trials = rand_tuner.oracle.get_best_trials(num_trials=num_trials)

    tr_score = [trial.score for trial in trials][::-1]
    ra_score = [trial.score for trial in rand_trials][::-1]

    plt.style.use(args["style-sheet"])

    plt.figure(figsize=(7, 7))

    plt.plot(tr_score, label="Bayesian Optimisation")
    plt.plot(ra_score, label="Random Search")

    plt.legend()

    label = time.time()

    plt.savefig("hp_" + args["inp-folder"] + "_" + str(label))

    if args["refit-best"]:
        best_model = tuner.get_best_models()[0]
        best_model.fit(x_tr, y_tr, batch_size=1000, epochs=300)
        print(best_model.evaluate(x_te, y_te))
 def get_tunable_hyper_parameters(self) -> kt.HyperParameters:
     hp = kt.HyperParameters()
     self.build(hp)
     return hp
Beispiel #16
0
def test_hyperband_oracle_one_sweep_parallel(tmp_dir):
    hp = kt.HyperParameters()
    hp.Float("a", -100, 100)
    hp.Float("b", -100, 100)
    oracle = hyperband_module.HyperbandOracle(
        hyperparameters=hp,
        objective=kt.Objective("score", "max"),
        hyperband_iterations=1,
        max_epochs=4,
        factor=2,
    )
    oracle._set_project_dir(tmp_dir, "untitled")

    # All round 0 trials from different brackets can be run
    # in parallel.
    round0_trials = []
    for i in range(10):
        t = oracle.create_trial("tuner" + str(i))
        assert t.status == "RUNNING"
        round0_trials.append(t)

    assert len(oracle._brackets) == 3

    # Round 1 can't be run until enough models from round 0
    # have completed.
    t = oracle.create_trial("tuner10")
    assert t.status == "IDLE"

    for t in round0_trials:
        oracle.update_trial(t.trial_id, {"score": 1})
        oracle.end_trial(t.trial_id, "COMPLETED")

    round1_trials = []
    for i in range(4):
        t = oracle.create_trial("tuner" + str(i))
        assert t.status == "RUNNING"
        round1_trials.append(t)

    # Bracket 0 is complete as it only has round 0.
    assert len(oracle._brackets) == 2

    # Round 2 can't be run until enough models from round 1
    # have completed.
    t = oracle.create_trial("tuner10")
    assert t.status == "IDLE"

    for t in round1_trials:
        oracle.update_trial(t.trial_id, {"score": 1})
        oracle.end_trial(t.trial_id, "COMPLETED")

    # Only one trial runs in round 2.
    round2_trial = oracle.create_trial("tuner0")

    assert len(oracle._brackets) == 1

    # No more trials to run, but wait for existing brackets to end.
    t = oracle.create_trial("tuner10")
    assert t.status == "IDLE"

    oracle.update_trial(round2_trial.trial_id, {"score": 1})
    oracle.end_trial(round2_trial.trial_id, "COMPLETED")

    t = oracle.create_trial("tuner10")
    assert t.status == "STOPPED", oracle._current_sweep
Beispiel #17
0
def main():
    base_folder, return_list, test, timer, train, val = init_test()
    prefix = 'hyper_'
    config = Config()
    model_path = base_folder / 'temp'

    find_model = True
    train_model = True
    eval_model = True
    if find_model:
        for model_type in [ModelType.TUNE_CLSTM, ModelType.TUNE_MLSTM]:
            train_seq = BeatmapSequence(df=train, is_train=True, config=config)
            val_seq = BeatmapSequence(df=val, is_train=False, config=config)
            test_seq = BeatmapSequence(df=test, is_train=False, config=config)

            # To search for a specific input:output combination, change `config`
            config.training.model_type = model_type

            tuner = kt.Hyperband(
                get_architecture_fn(config)(train_seq, False, config),
                objective=kt.Objective('val_avs_dist', direction='min'),
                hyperband_iterations=1,
                max_epochs=100,
                factor=4,
                directory=base_folder / 'temp' / 'hyper_search',
                project_name=f'{get_architecture_fn(config).__qualname__}',
                overwrite=False,  # CAUTION!
            )
            tuner.search_space_summary()

            callbacks = create_callbacks(train_seq, config)

            tuner.search(
                x=train_seq,
                validation_data=val_seq,
                callbacks=callbacks,
                epochs=60,
                verbose=2,
                workers=10,
                max_queue_size=16,
                use_multiprocessing=False,
            )

            print(tuner.results_summary())
            print(tuner.get_best_models(2)[0].summary())
            print(tuner.get_best_models(2)[0].evaluate(test_seq))

    if train_model:
        # Train specific huperparameters
        hp = kt.HyperParameters()
        fixed_params = {
            'connections_0': 2,
            'connections_1': 2,
            'connections_2': 2,
            'connections_3': 3,
            'connections_4': 1,
            'connections_5': 3,
            'connections_6': 2,
            'depth_0': 18,
            'depth_1': 23,
            'depth_2': 43,
            'depth_3': 13,
            'depth_4': 52,
            'depth_5': 5,
            'depth_6': 11,
            'dropout_0': 0.25612932926324405,
            'dropout_1': 0.1620424523625309,
            'dropout_2': 0.4720468723284278,
            'dropout_3': 0.43881829788147036,
            'dropout_4': 0.44741780640383355,
            'dropout_5': 0.3327191857714107,
            'dropout_6': 0.1367707920005909,
            'initial_learning_rate': 0.008,
            'label_smoothing': 0.13716631669361445,
            'lstm_layers': 3,
            'width_0': 16,
            'width_1': 9,
            'width_2': 15,
            'width_3': 16,
            'width_4': 5,
            'width_5': 11,
            'width_6': 4,
        }
        for param, val in fixed_params.items():
            hp.Fixed(param, value=val)

        model = get_architecture_fn(config)(train_seq, False, config)(hp)
        model.summary()
        tf.keras.utils.plot_model(model,
                                  to_file=base_folder / 'temp' /
                                  'model_architecture.png',
                                  show_shapes=True)
        model.fit(
            x=train_seq,
            validation_data=val_seq,
            callbacks=callbacks,
            epochs=81,
            verbose=2,
            workers=10,
            max_queue_size=16,
            use_multiprocessing=False,
        )

        model_path.mkdir(parents=True, exist_ok=True)

        save_model(model, model_path, train_seq, config, hp=hp)
        timer('Saved model', 5)

    if eval_model:
        stateful_model = tf.keras.models.load_model(
            model_path / 'stateful_model.keras',
            custom_objects={'Perplexity': Perplexity})

        timer('Loaded stateful model', 5)

        input_folder = base_folder / 'human_beatmaps' / 'new_dataformat'
        output_folder = base_folder / 'testing' / 'generated_songs'
        song_codes_to_gen = list(
            x for x in test.index.to_frame()["name"].unique()[:5])
        song_codes_to_gen = [
            '133b',
        ]
        print(song_codes_to_gen)
        for song_code in song_codes_to_gen:
            beatmap_folder = input_folder / song_code
            print(beatmap_folder)
            generate_complete_beatmaps(beatmap_folder, output_folder,
                                       stateful_model, config)
            timer('Generated beatmaps', 5)
import tensorflow as tf

#tf.keras.backend.set_floatx('float64')

# settings related to dataset
data_name = 'Pendulum'
folder_name = 'exp2_best'
num_data_train_files = 6
num_initial_conditions = 5000  # per training file

# settings related to system
num_steps = 51
input_dim = 2
delta_t = 0.02

hp = kerastuner.HyperParameters()

# settings related to training
epochs = 15
hp.Fixed('learning_rate', 10.0**(-3))
batch_size = 128

# settings related to network architecture
hp.Fixed("omega_num_complex_pairs", 1)
hp.Fixed("omega_num_real", 0)

hp.Fixed("enc_dec_num_layers", 2)
hp.Fixed('enc_dec_layer_width', 80)

hp.Fixed("omega_num_layers", 1)
hp.Fixed('omega_layer_width', 170)
Beispiel #19
0
def check_initial_hp(initial_hp, graph):
    hp = kerastuner.HyperParameters()
    for i in range(3):
        hp.values = copy.copy(initial_hp)
        graph.build(hp)
    assert len(set(initial_hp.keys()) - set(hp._hps.keys())) == 0
def test_static_space_errors(tmp_dir):

    def build_model_static(hp):
        inputs = keras.Input(shape=(INPUT_DIM,))
        x = inputs
        for i in range(hp.get('num_layers')):
            x = keras.layers.Dense(
                units=hp.get('units_' + str(i)),
                activation='relu')(x)
        outputs = keras.layers.Dense(NUM_CLASSES, activation='softmax')(x)
        model = keras.Model(inputs, outputs)
        model.compile(
            optimizer=keras.optimizers.Adam(
                hp.Float('learning_rate', 1e-5, 1e-2)),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])
        return model

    hp = kerastuner.HyperParameters()
    hp.Int('num_layers', 1, 3, 1, default=2)
    hp.Int('units_0', 4, 6, 1, default=5)
    hp.Int('units_1', 4, 6, 1, default=5)

    with pytest.raises(RuntimeError, match='`allow_new_entries` is `False`'):
        tuner = kerastuner.tuners.RandomSearch(
            build_model_static,
            objective='val_accuracy',
            max_trials=2,
            directory=tmp_dir,
            hyperparameters=hp,
            allow_new_entries=False)
        tuner.search(
            x=TRAIN_INPUTS,
            y=TRAIN_TARGETS,
            epochs=2,
            validation_data=(VAL_INPUTS, VAL_TARGETS))

    def build_model_static_invalid(hp):
        inputs = keras.Input(shape=(INPUT_DIM,))
        x = inputs
        for i in range(hp.get('num_layers')):
            x = keras.layers.Dense(
                units=hp.get('units_' + str(i)),
                activation='relu')(x)
        outputs = keras.layers.Dense(NUM_CLASSES, activation='softmax')(x)
        model = keras.Model(inputs, outputs)
        model.compile(
            optimizer=keras.optimizers.Adam(
                hp.Float('learning_rate', 0.001, 0.008, 0.001)),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])
        return model

    with pytest.raises(RuntimeError,
                       match='`allow_new_entries` is `False`'):
        tuner = kerastuner.tuners.RandomSearch(
            build_model_static_invalid,
            objective='val_accuracy',
            max_trials=2,
            directory=tmp_dir,
            hyperparameters=hp,
            allow_new_entries=False)
        tuner.search(
            x=TRAIN_INPUTS,
            y=TRAIN_TARGETS,
            epochs=2,
            validation_data=(VAL_INPUTS, VAL_TARGETS))
Beispiel #21
0
def build_GNNModel(hp=kt.HyperParameters(), metrics=True, loss_balance=1.0):
    '''Build model with hyper parameter object'''
    # small AMP (170k parameters)
    #hp.Choice('atom_feature_size', [32, 64, 128, 256], ordered=True, default=128)
    #hp.Choice('edge_feature_size', [1, 2, 3, 64], ordered=True, default=64)
    #hp.Choice('edge_hidden_size', [16, 32, 64, 128, 256], ordered=True, default=64)
    #hp.Int('mp_layers', 1, 6, step=1, default=4)
    #hp.Int('fc_layers', 2, 6, step=1, default=3)
    #hp.Int('edge_fc_layers', 2, 6, step=1, default=3)

    hp.Choice('atom_feature_size', [32, 64, 128, 256],
              ordered=True,
              default=256)
    hp.Choice('edge_feature_size', [1, 2, 3, 8, 64], ordered=True, default=3)
    hp.Choice('edge_hidden_size', [16, 32, 64, 128, 256],
              ordered=True,
              default=128)
    hp.Int('mp_layers', 1, 6, step=1, default=4)
    hp.Int('fc_layers', 2, 6, step=1, default=4)
    hp.Int('edge_fc_layers', 2, 6, step=1, default=4)

    hp.Choice('noise', [0.0, 0.025, 0.05, 0.1], ordered=True, default=0.025)
    hp.Choice('dropout', [True, False], default=True)
    hp.Fixed('rbf_low', 0.005)
    hp.Fixed('rbf_high', 0.20)
    hp.Choice('mp_activation', ['relu', 'softplus', 'tanh'],
              default='softplus')
    hp.Choice('fc_activation', ['relu', 'softplus'], default='softplus')

    # load peak standards
    standards = nmrdata.load_standards()

    model = GNNModel(hp, standards)

    # compile with MSLE (to treat vastly different label mags)
    optimizer = tf.keras.optimizers.Adam(
        hp.Choice('learning_rate', [1e-3, 5e-4, 1e-4, 1e-5], default=1e-4))

    embeddings = nmrdata.load_embeddings()

    #label_idx = type_mask(r'.*\-H.*', embeddings, regex=True)
    label_idx = type_mask(r'.*', embeddings, regex=True)
    corr_loss = NameLoss(label_idx, s=loss_balance)
    loss = corr_loss

    label_idx = type_mask(r'.*\-H.*', embeddings, regex=True)
    h_rmsd = NameRMSD(label_idx, name='h_rmsd')
    label_idx = type_mask(r'.*\-N.*', embeddings, regex=True)
    n_rmsd = NameRMSD(label_idx, name='n_rmsd')
    label_idx = type_mask(r'.*\-C.*', embeddings, regex=True)
    c_rmsd = NameRMSD(label_idx, name='c_rmsd')
    label_idx = type_mask(r'.*\-H$', embeddings, regex=True)
    hn_rmsd = NameRMSD(label_idx, name='hn_rmsd')
    label_idx = type_mask(r'.*\-HA*', embeddings, regex=True)
    ha_rmsd = NameRMSD(label_idx, name='ha_rmsd')
    label_idx = type_mask(r'.*\-H.*', embeddings, regex=True)
    h_r = NameCorr(label_idx, name='h_r')
    label_idx = type_mask(r'.*\-N.*', embeddings, regex=True)
    n_r = NameCorr(label_idx, name='n_r')
    label_idx = type_mask(r'.*\-C.*', embeddings, regex=True)
    c_r = NameCorr(label_idx, name='c_r')
    label_idx = type_mask(r'.*\-H$', embeddings, regex=True)
    hn_r = NameCorr(label_idx, name='hn_r')
    label_idx = type_mask(r'.*\-HA.*', embeddings, regex=True)
    ha_r = NameCorr(label_idx, name='ha_r')
    label_idx = type_mask(r'.*\-HA.*', embeddings, regex=True)
    ha_r = NameCorr(label_idx, name='ha_r')
    ha_count = NameCount(label_idx, name='avg_ha_count')

    label_idx = type_mask(r'DFT.*', embeddings, regex=True)
    dft_r = NameCorr(label_idx, name='dft_r')
    dft_count = NameCount(label_idx, name='avg_dft_count')
    label_idx = type_mask(r'MB.*', embeddings, regex=True)
    mb_r = NameCorr(label_idx, name='mb_r')
    mb_count = NameCount(label_idx, name='avg_mb_count')

    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=[
                      h_rmsd, n_rmsd, c_rmsd, hn_rmsd, ha_rmsd, h_r, n_r, c_r,
                      hn_r, ha_r, ha_count, mb_r, mb_count, dft_r, dft_count
                  ] if metrics else None)
    return model
Beispiel #22
0
def hyper_parameter_search(rnn_inputs,
                           dense_inputs,
                           targets: np.array,
                           validation_split=0.2,
                           max_epochs=15,
                           selected_hyperparameters=None,
                           search_title=None,
                           model_name="AuthorAttributionModel"):
    """
    Perfoms a hyper-parameter search on the network, writes it to the file system and return resulting tuner
    :param rnn_inputs: dictionary with inputs to RNN (shapes Nx?, N = #posts)
    :param dense_inputs: dictionary with inputs to dense branch (shapes Nx?, N = #posts)
    :param targets: one-hot encoded target users as numpy array with (shape NxM, N = #posts, M = #users)
    :param validation_split: percentage of samples that should be used for validation
    :param max_epochs: maximal epochs run by the tuner
    :param selected_hyperparameters: Only tune for selected parameters,
        or fix all to just get the tuner for building the model
    :param search_title: title of the search, used to write logs to file system
    :param model_name: name of the model
    :return: Keras tuner object
    """

    if search_title is None:
        search_title = "search_" + datetime.datetime.now().strftime(
            "%Y-%m-%d_%H-%M-%S")

    num_train_samples = targets.shape[0]

    inputs, rnn_network_inputs, dense_network_inputs = prepare_inputs(
        rnn_inputs, dense_inputs, targets)
    pathlib.Path("hyperparams/" + search_title).mkdir(parents=True,
                                                      exist_ok=True)

    num_users = targets.shape[1]
    num_dense_inputs = dense_network_inputs.shape[1]
    num_rnn_inputs = rnn_network_inputs.shape[1]
    num_rnn_inputs_dimension = rnn_network_inputs.shape[2]
    log_run_inputs(rnn_inputs, dense_inputs, num_users, num_dense_inputs,
                   num_rnn_inputs, num_rnn_inputs_dimension, num_train_samples,
                   search_title)

    classifier = AuthorClassifier(num_users, num_dense_inputs, num_rnn_inputs,
                                  num_rnn_inputs_dimension, num_train_samples,
                                  search_title, model_name)

    stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                                     patience=3)

    tensorboard_log_dir = "tensorboard_logs/" + search_title
    pathlib.Path(tensorboard_log_dir).mkdir(parents=True, exist_ok=True)
    hist_callback = tf.keras.callbacks.TensorBoard(
        log_dir=tensorboard_log_dir,
        histogram_freq=1,
        write_images=True,
        # embeddings_freq=1,
        write_graph=True)

    if selected_hyperparameters is None:
        tuner = kt.Hyperband(classifier,
                             objective='val_accuracy',
                             max_epochs=max_epochs,
                             factor=3,
                             directory='hyperparams',
                             project_name=search_title)
    else:
        tunable_parameters = classifier.get_tunable_hyper_parameters()
        assert all(parameter in selected_hyperparameters.keys() for parameter in tunable_parameters.values.keys()), \
            "not all parameters needed where provided, need: " + str(tunable_parameters.values.keys())

        hp = kt.HyperParameters()
        for key in selected_hyperparameters.keys():
            if type(selected_hyperparameters[key]) != list:
                hp.Fixed(key, selected_hyperparameters[key])
            else:
                hp.Choice(key, selected_hyperparameters[key])

        tuner = kt.Hyperband(classifier,
                             hyperparameters=hp,
                             objective='val_accuracy',
                             max_epochs=max_epochs,
                             factor=3,
                             directory='hyperparams',
                             project_name=search_title)

    tuner.search(inputs,
                 targets,
                 validation_split=validation_split,
                 callbacks=[hist_callback, stop_callback])
    print("Hyper-parameters search '" + search_title +
          "' completed. Top results:")
    tuner.results_summary(5)
    return tuner
Beispiel #23
0
    def fit(self,
            x=None,
            y=None,
            validation_split=0,
            validation_data=None,
            **kwargs):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        Args:
            x: numpy.ndarray or tensorflow.Dataset. Training data x.
            y: numpy.ndarray or tensorflow.Dataset. Training data y.
            validation_split: Float between 0 and 1.
                Fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate
                the loss and any model metrics
                on this data at the end of each epoch.
                The validation data is selected from the last samples
                in the `x` and `y` data provided, before shuffling. This argument is
                not supported when `x` is a dataset.
            validation_data: Data on which to evaluate
                the loss and any model metrics at the end of each epoch.
                The model will not be trained on this data.
                `validation_data` will override `validation_split`.
                `validation_data` could be:
                  - tuple `(x_val, y_val)` of Numpy arrays or tensors
                  - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
                  - dataset or a dataset iterator
                For the first two cases, `batch_size` must be provided.
                For the last case, `validation_steps` must be provided.
        """
        # Initialize HyperGraph model
        x = nest.flatten(x)
        y = nest.flatten(y)

        y = self._label_encoding(y)
        # TODO: Set the shapes only if they are not provided by the user when
        #  initiating the HyperHead or Block.
        for y_input, output_node in zip(y, self.outputs):
            if len(y_input.shape) == 1:
                y_input = np.reshape(y_input, y_input.shape + (1, ))
            output_node.shape = y_input.shape[1:]
            output_node.in_hypermodels[0].output_shape = output_node.shape

        # Split the data with validation_split
        if (all([isinstance(temp_x, np.ndarray) for temp_x in x])
                and all([isinstance(temp_y, np.ndarray) for temp_y in y])
                and validation_data is None and validation_split):
            (x,
             y), (x_val,
                  y_val) = utils.split_train_to_valid(x, y, validation_split)
            validation_data = x_val, y_val

        # TODO: Handle other types of input, zip dataset, tensor, dict.
        # Prepare the dataset
        x, y, validation_data = utils.prepare_preprocess(x, y, validation_data)

        self.preprocess(hp=kerastuner.HyperParameters(),
                        x=x,
                        y=y,
                        validation_data=validation_data,
                        fit=True)
        self.tuner = tuner.RandomSearch(hypermodel=self,
                                        objective='val_loss',
                                        max_trials=self.max_trials,
                                        directory=self.directory)

        # TODO: allow early stop if epochs is not specified.
        self.tuner.search(x=x, y=y, validation_data=validation_data, **kwargs)
Beispiel #24
0
        plt.subplot(ploc)
        plt.xlabel(elem[n])
        plt.plot(X_data[:, n],
                 y_true,
                 base[n],
                 alpha=0.3,
                 label=f"NH4 relative to {elem[n]} (observed)")
        plt.plot(X_data[:, n],
                 y_pred,
                 'xk',
                 alpha=0.3,
                 label=f"NH4 relative to {elem[n]} (predicted)")
        plt.legend()
    # plt.savefig("./results.png", format='png')
    plt.show()


if __name__ == '__main__':
    data = cropData(loadData())
    xTest, xTrain, xVal, yTrain, yTest, yVal = splitData(data)
    (trainedModel, xTest, yTest, yPred) = trainModel(data)
    fullPlot(xTest, yTest, yPred)
    hyparams = kt.HyperParameters()
    hmodel = hyper_tune(hyparams, xTrain, yTrain, xVal, yVal)
    hmodel.fit(xTrain, yTrain)
    hyperPreds = hmodel.predict(xTest)
    print("The predictions coming back have shape", hyperPreds.shape)
    heval = hmodel.evaluate(xTest, yTest, batch_size=64)
    print("Tuned mean squared error:", heval)
    print(hmodel.summary())
    fullPlot(xTest, yTest, hyperPreds)
Beispiel #25
0
    def fit(self,
            x=None,
            y=None,
            epochs=None,
            callbacks=None,
            validation_split=0,
            validation_data=None,
            **kwargs):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray or tensorflow.Dataset. Training data x.
            y: numpy.ndarray or tensorflow.Dataset. Training data y.
            epochs: Int. The number of epochs to train each model during the search.
                If unspecified, by default we train for a maximum of 1000 epochs,
                but we stop training if the validation loss stops improving for 10
                epochs (unless you specified an EarlyStopping callback as part of
                the callbacks argument, in which case the EarlyStopping callback you
                specified will determine early stopping).
            callbacks: List of Keras callbacks to apply during training and
                validation.
            validation_split: Float between 0 and 1.
                Fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate
                the loss and any model metrics
                on this data at the end of each epoch.
                The validation data is selected from the last samples
                in the `x` and `y` data provided, before shuffling. This argument is
                not supported when `x` is a dataset.
            validation_data: Data on which to evaluate
                the loss and any model metrics at the end of each epoch.
                The model will not be trained on this data.
                `validation_data` will override `validation_split`.
                `validation_data` could be:
                  - tuple `(x_val, y_val)` of Numpy arrays or tensors
                  - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
                  - dataset or a dataset iterator
                For the first two cases, `batch_size` must be provided.
                For the last case, `validation_steps` must be provided.
            **kwargs: Any arguments supported by keras.Model.fit.
        """
        dataset, validation_data = self._prepare_data(
            x=x,
            y=y,
            validation_data=validation_data,
            validation_split=validation_split)

        # Initialize the hypermodel.
        self._meta_build(dataset)
        self.hypermodel.set_io_shapes(dataset)

        # Build the hypermodel in tuner init.
        hp = kerastuner.HyperParameters()
        self.hypermodel.hyper_build(hp)
        self.hypermodel.preprocess(hp=kerastuner.HyperParameters(),
                                   dataset=dataset,
                                   validation_data=validation_data,
                                   fit=True)
        self.tuner = tuner.RandomSearch(hypermodel=self.hypermodel,
                                        objective='val_loss',
                                        max_trials=self.max_trials,
                                        directory=self.directory,
                                        seed=self.seed,
                                        project_name=self.name)
        self.hypermodel.clear_preprocessors()

        # Process the args.
        if callbacks is None:
            callbacks = []
        if epochs is None:
            epochs = 1000
            if not any([
                    isinstance(callback, tf.keras.callbacks.EarlyStopping)
                    for callback in callbacks
            ]):
                callbacks = callbacks + [
                    tf.keras.callbacks.EarlyStopping(patience=10)
                ]

        self.tuner.search(x=dataset,
                          epochs=epochs,
                          callbacks=callbacks,
                          validation_data=validation_data,
                          **kwargs)
Beispiel #26
0
def test_time_series_input_node_deserialize_build_to_tensor():
    node = ak.TimeseriesInput(shape=(32, ), lookback=2)
    node = nodes.deserialize(nodes.serialize(node))
    node.shape = (32, )
    output = node.build(kerastuner.HyperParameters())
    assert isinstance(output, tf.Tensor)
Beispiel #27
0
    def fit(self,
            x=None,
            y=None,
            validation_split=0,
            validation_data=None,
            **kwargs):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        # Arguments
            x: numpy.ndarray or tensorflow.Dataset. Training data x.
            y: numpy.ndarray or tensorflow.Dataset. Training data y.
            validation_split: Float between 0 and 1.
                Fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate
                the loss and any model metrics
                on this data at the end of each epoch.
                The validation data is selected from the last samples
                in the `x` and `y` data provided, before shuffling. This argument is
                not supported when `x` is a dataset.
            validation_data: Data on which to evaluate
                the loss and any model metrics at the end of each epoch.
                The model will not be trained on this data.
                `validation_data` will override `validation_split`.
                `validation_data` could be:
                  - tuple `(x_val, y_val)` of Numpy arrays or tensors
                  - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
                  - dataset or a dataset iterator
                For the first two cases, `batch_size` must be provided.
                For the last case, `validation_steps` must be provided.
            **kwargs: Any arguments supported by keras.Model.fit.
        """
        dataset, validation_data = self.prepare_data(
            x=x,
            y=y,
            validation_data=validation_data,
            validation_split=validation_split)
        self._meta_build(dataset)
        self.hypermodel.set_io_shapes(dataset)
        hp = kerastuner.HyperParameters()
        self.hypermodel.hyper_build(hp)
        self.hypermodel.preprocess(
            hp=kerastuner.HyperParameters(),
            dataset=dataset,
            validation_data=validation_data,
            fit=True)
        self.tuner = tuner.RandomSearch(
            hypermodel=self.hypermodel,
            objective='val_loss',
            max_trials=self.max_trials,
            directory=self.directory,
            seed=self.seed,
            project_name=self.name)

        # TODO: allow early stop if epochs is not specified.
        self.tuner.search(x=dataset,
                          validation_data=validation_data,
                          **kwargs)