def _build_keras_model(hparams: kerastuner.HyperParameters) -> tf.keras.Model: """Creates a DNN Keras model for classifying iris data. Args: hparams: Holds HyperParameters for tuning. Returns: A Keras Model. """ # The model below is built with Functional API, please refer to # https://www.tensorflow.org/guide/keras/overview for all API options. inputs = [keras.layers.Input(shape=(1, ), name=f) for f in _FEATURE_KEYS] d = keras.layers.concatenate(inputs) for _ in range(int(hparams.get('num_layers'))): d = keras.layers.Dense(8, activation='relu')(d) outputs = keras.layers.Dense(3, activation='softmax')(d) model = keras.Model(inputs=inputs, outputs=outputs) model.compile(optimizer=keras.optimizers.Adam( hparams.get('learning_rate')), loss='sparse_categorical_crossentropy', metrics=[keras.metrics.SparseCategoricalAccuracy()]) model.summary(print_fn=absl.logging.info) return model
def build_task_towers(hp: HyperParameters, n_tasks: int, min_layers: int=1, max_layers: int=5, min_units_per_layer: int=8, max_units_per_layer: int=40 ): """ Helper method to build task specific networks """ task_towers = [] n_layers = hp.Int(name="n_layers_tasks", min_value=min_layers, max_value=max_layers) for j in range(n_tasks): architecture = [] for i in range(n_layers): n_units = hp.Int(name="n_units_layer_{0}_task_{1}".format(i, j), min_value=min_units_per_layer, max_value=max_units_per_layer) architecture.append(n_units) architecture.append(1) task_towers.append(MLP(architecture, hp["hidden_layer_activation"], hp["output_layer_activation"]) ) return task_towers
def _build_keras_model(hparams: kerastuner.HyperParameters, tf_transform_output: tft.TFTransformOutput) -> tf.keras.Model: """Creates a Keras WideDeep Classifier model. Args: hparams: Holds HyperParameters for tuning. tf_transform_output: A TFTransformOutput. Returns: A keras Model. """ # Defines deep feature columns and input layers. deep_columns = [ tf.feature_column.numeric_column( key=features.transformed_name(key), shape=()) for key in features.NUMERIC_FEATURE_KEYS ] input_layers = { column.key: tf.keras.layers.Input(name=column.key, shape=(), dtype=tf.float32) for column in deep_columns } # Defines wide feature columns and input layers. categorical_columns = [ tf.feature_column.categorical_column_with_identity( key=features.transformed_name(key), num_buckets=tf_transform_output.num_buckets_for_transformed_feature(features.transformed_name(key)), default_value=0) for key in features.CATEGORICAL_FEATURE_KEYS ] wide_columns = [ tf.feature_column.indicator_column(categorical_column) for categorical_column in categorical_columns ] input_layers.update({ column.categorical_column.key: tf.keras.layers.Input(name=column.categorical_column.key, shape=(), dtype=tf.int32) for column in wide_columns }) # Build Keras model using hparams. deep = tf.keras.layers.DenseFeatures(deep_columns)(input_layers) for n in range(int(hparams.get('n_layers'))): deep = tf.keras.layers.Dense(units=hparams.get('n_units_' + str(n + 1)))(deep) wide = tf.keras.layers.DenseFeatures(wide_columns)(input_layers) output = tf.keras.layers.Dense(features.NUM_CLASSES, activation='softmax')( tf.keras.layers.concatenate([deep, wide])) model = tf.keras.Model(input_layers, output) model.compile( loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=hparams.get('learning_rate')), metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) model.summary(print_fn=absl.logging.info) return model
def build_hyper_cross_stitched_model(hp: HyperParameters, n_tasks: int, all_columns: List[str], cat_features_dim: Dict[str, int], restricted_hyperparameter_search: bool): """ Build model for Cross Stitched networks Parameters ---------- hp: instance of HyperParameters Hyper-Parameters that define architecture and training of neural networks n_tasks: int Number of tasks all_columns: list Names of the features cat_features_dim: dict Dictionary that maps from the name of categorical feature to its dimensionality. restricted_hyperparameter_search: bool If True, then fixes following hyperparameters and does not optimize them. - batch_size = 1024 - hidden_layer_activation = relu - optimizer = sgd Returns ------- model: tensorflow.keras.models.Model Compiled Cross Stitched Networks Model """ # define activation functions and preproceing layer build_activation_functions(hp, restricted_hyperparameter_search) preprocessing_layer = build_preprocessing_layer_uci_income( hp, all_columns, cat_features_dim) # propagate input through preprocesing layer input_layer = Input(shape=(len(all_columns), )) x = preprocessing_layer(input_layer) # build cross-stitch network model n_layers = hp.Int("number_of_hidden_layers", min_value=2, max_value=8) for i in range(n_layers): n_units = hp.Int("n_units_layer_{0}".format(i), min_value=8, max_value=40) dense_layers_output = [ Dense(n_units, hp["hidden_layer_activation"])(x) for _ in range(n_tasks) ] x = CrossStitchBlock()(dense_layers_output) output_layers = [ Dense(1, hp['output_layer_activation'])(x) for _ in range(n_tasks) ] model = Model(inputs=input_layer, outputs=output_layers) return model
def build_experts(hp: HyperParameters): """ Helper method to build expert networks for OMOE and MMOE""" architecture = [] n_experts = hp.Int("n_experts", 4, 10, default=6) n_layers = hp.Int("n_layers_experts", 2, 4, default=2) for i in range(n_layers): n_units = hp.Int("n_units_experts_{0}".format(i), 10, 20) architecture.append( n_units ) return [MLP(architecture, hp["hidden_layer_activation"]) for _ in range(n_experts)]
def build_activation_functions(hp: HyperParameters, restricted_hyperparameter_search: bool): """ Helper method for setting activation functions """ if restricted_hyperparameter_search: hp.Fixed("hidden_layer_activation","relu") else: hp.Choice("hidden_layer_activation", ["relu","elu","selu"]) hp.Fixed("output_layer_activation","sigmoid") return hp
def build_mtl_shared_bottom(hp: HyperParameters, n_tasks: int, all_columns: List[str], cat_features_dim: Dict[str, int], restricted_hyperparameter_search: bool): """ Build model for L2 constrained multi-task learning model Parameters ---------- hp: instance of HyperParameters Hyper-Parameters that define architecture and training of neural networks n_tasks: int Number of tasks all_columns: list Names of the features cat_features_dim: dict Dictionary that maps from the name of categorical feature to its dimensionality. restricted_hyperparameter_search: bool If True, then fixes following hyperparameters and does not optimize them. - batch_size = 1024 - hidden_layer_activation = relu - optimizer = sgd Returns ------- model: tensorflow.keras.models.Model Compiled standard MTL model with hard parameter sharing """ # define activation functions and preproceing layer build_activation_functions(hp, restricted_hyperparameter_search) preprocessing_layer = build_preprocessing_layer_uci_income( hp, all_columns, cat_features_dim) # propagate input through preprocesing layer input_layer = Input(shape=(len(all_columns), )) x = preprocessing_layer(input_layer) # build shared layers architecture = [] n_layers = hp.Int("n_layers_experts", 2, 4, default=2) for i in range(n_layers): n_units = hp.Int("n_units_experts_{0}".format(i), 10, 20) architecture.append(n_units) shared_layers = MLP(architecture, hp["hidden_layer_activation"]) shared_layers_output = shared_layers(input_layer) # task layers task_towers = build_task_towers(hp, n_tasks) output_layer = [task(shared_layers_output) for task in task_towers] model = Model(inputs=input_layer, outputs=output_layer) return model
def _build_model( hp: HyperParameters, input_layer: KerasTensor, encoded_layer: KerasTensor, ) -> keras.Model: """Build the part of the architecture tunable by keras-tuner. Note: It is a relatively simple dense network, with self-normalizing layers. Args: hp: hyperparameters passed by the tuner. input layer: The input layer of the model. encoded_layer: The encoding layer of the model. Returns: A tunable keras functional model. """ x = encoded_layer for i in range(hp.Int("dense_layers", 1, 3, default=2)): x = layers.Dense( units=hp.Int(f"units_layer_{i + 1}", min_value=32, max_value=256, step=32, default=64), activation="selu", kernel_initializer=tf.keras.initializers.LecunNormal(), )(encoded_layer) x = layers.AlphaDropout(0.5)(x) output_layer = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(input_layer, output_layer) model.compile( optimizer=keras.optimizers.Adam( hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4], default=1e-3)), loss="binary_crossentropy", metrics=[ "accuracy", tfa.metrics.F1Score(num_classes=2, average="micro", threshold=0.5, name="f1_score"), ], ) return model
def build_model(hp: HyperParameters): inputs = tf.keras.Input((15, )) x = inputs dropout = hp.Float('dropout', 0.0, 0.5, 0.1, default=0.2) for i in range(1): x = tf.keras.layers.Dense( 2**hp.Int('exponent_{}'.format(i), 5, 8, default=6), 'relu')(x) x = tf.keras.layers.Dropout(dropout)(x) x = tf.keras.layers.Dense(18, activation='softmax', dtype='float32')(x) model = tf.keras.Model(inputs=inputs, outputs=x) model.compile('adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model
def fit_hier_embedding(X, y, result_dir, project): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) y_train = to_categorical(y_train, output_dim) y_test = to_categorical(y_test, output_dim) X_train1 = X_train[['Rating', 'CocoaPercent']].values X_train2 = X_train.drop(['Rating', 'CocoaPercent'], axis=1).values X_test1 = X_test[['Rating', 'CocoaPercent']].values X_test2 = X_test.drop(['Rating', 'CocoaPercent'], axis=1).values dim1 = X_train1.shape[1] dim2 = X_train2.shape[1] hp = HyperParameters() bm = lambda x: tune_optimizer_model(hp, dim1, dim2) print(dim1, dim2) tuner = RandomSearch(bm, objective='val_accuracy', max_trials=MAX_TRIALS, executions_per_trial=EXECUTIONS_PER_TRIAL, directory=result_dir, project_name=project, seed=32) TRAIN_EPOCHS = 1000 tuner.search(x=[X_train1, X_train2], y=y_train, epochs=TRAIN_EPOCHS, validation_data=([X_test1, X_test2], y_test)) tuner.results_summary()
def fit(self, X: pd.DataFrame, y: np.ndarray, batch_size: int = 32, epochs: int = 20) -> None: """Fit on training data. Notes: The input of the model is determined by the features metadata. If a model has already been found (by hyperparameter tuning for example), the fitting is done on this model, else on the model with the default hyperparameters. Args: X: Input features. y: Ground truth labels as a numpy array of 0-s and 1-s. Returns: None. """ dataset = self._preprocessor.preprocess_fit(X, y, batch_size=batch_size) dataset_spec = self._preprocessor.get_dataset_spec(dataset) if self._model is None: self._model = self._model_factory(dataset_spec).build( hp=HyperParameters(), dataset=dataset) class_weight = self._preprocessor.get_class_weight(dataset) self._model.fit(dataset, epochs=epochs, class_weight=class_weight)
def _build_keras_model(hparams: kerastuner.HyperParameters) -> tf.keras.Model: features_in = [] features_in.extend(DENSE_FEATURES) features_in.extend(BINARY_FEATURES) features_in = [f'{x}_xf' for x in features_in] input_layers = { colname: tf.keras.layers.Input(name=colname, shape=(1, ), dtype=tf.float32) for colname in features_in } x = tf.keras.layers.Concatenate(axis=-1)(list(input_layers.values())) h = int(hparams.get(H_SIZE)) x = tf.keras.layers.Dense(units=h, activation='relu')(x) out = tf.keras.layers.Dense(units=1, activation='sigmoid')(x) model = tf.keras.Model(input_layers, out) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.BinaryAccuracy()]) model.summary(print_fn=logging.info) return model
def _build_keras_model(data_provider: KerasDataProvider, hparams: kerastuner.HyperParameters) -> tf.keras.Model: """Returns a Keras Model for the given data adapter. Args: data_provider: Data adaptor used to get the task information. hparams: Hyperparameters of the model. Returns: A keras model for the given adapter and hyperparams. """ feature_columns = data_provider.get_numeric_feature_columns( ) + data_provider.get_embedding_feature_columns() input_layers = data_provider.get_input_layers() # All input_layers must be consumed for the Keras Model to work. assert len(feature_columns) >= len(input_layers) x = tf.keras.layers.DenseFeatures(feature_columns)(input_layers) hparam_nodes = hparams.get('num_nodes') for numnodes in [hparam_nodes] * hparams.get('num_layers'): x = tf.keras.layers.Dense(numnodes)(x) output = tf.keras.layers.Dense(data_provider.head_size, activation=data_provider.head_activation, name='output')(x) model = tf.keras.Model(input_layers, output) lr = float(hparams.get('learning_rate')) optimizer_str = hparams.get('optimizer') if optimizer_str == 'Adam': optimizer = tf.keras.optimizers.Adam(lr=lr) elif optimizer_str == 'Adagrad': optimizer = tf.keras.optimizers.Adagrad(lr=lr) elif optimizer_str == 'RMSprop': optimizer = tf.keras.optimizers.RMSprop(lr=lr) elif optimizer_str == 'SGD': optimizer = tf.keras.optimizers.SGD(lr=lr) model.compile(loss=data_provider.loss, optimizer=optimizer, metrics=data_provider.metrics) model.summary() return model
def _verify_output(self): # Test best hparams. best_hparams_path = os.path.join(self._best_hparams.uri, 'best_hparams.txt') self.assertTrue(tf.gfile.Exists(best_hparams_path)) best_hparams_config = json.loads( file_io.read_file_to_string(best_hparams_path)) best_hparams = HyperParameters.from_config(best_hparams_config) self.assertIn(best_hparams.get('learning_rate'), (1e-1, 1e-3)) self.assertBetween(best_hparams.get('num_layers'), 2, 10)
def _build_keras_model(hparams: kerastuner.HyperParameters) -> tf.keras.Model: """Creates Keras model for testing. Args: hparams: Holds HyperParameters for tuning. Returns: A Keras Model. """ model = keras.Sequential() model.add(keras.layers.Dense(64, activation='relu', input_shape=(32, ))) for _ in range(hparams.get('num_layers')): # pytype: disable=wrong-arg-types model.add(keras.layers.Dense(64, activation='relu')) model.add(keras.layers.Dense(10, activation='softmax')) model.compile(optimizer=keras.optimizers.Adam( hparams.get('learning_rate')), loss='categorical_crossentropy', metrics=[tf.keras.metrics.Accuracy(name='accuracy')]) return model
def build_optimizer(hp: HyperParameters): """ Helper method that defines hyperparameter optimization for optimizer""" optimizer = hp.Choice(name="optimizer", values=["adam","sgd","rms"], default="adam") learning_rate = hp.Float(name="learning_rate", min_value=1e-4, max_value=5e-3, sampling="log", default=1e-3) # probably could use enums here if optimizer == "adam": return Adam(learning_rate=learning_rate) elif optimizer == "sgd": return SGD(learning_rate=learning_rate) elif optimizer == "rms": return RMSprop(learning_rate=learning_rate) else: raise NotImplementedError()
def _build_keras_model(hparams: kerastuner.HyperParameters) -> tf.keras.Model: """Creates a DNN Keras model for classifying iris data. Args: hparams: Holds HyperParameters for tuning. Returns: A Keras Model. """ absl.logging.info('HyperParameters config: %s' % hparams.get_config()) inputs = [keras.layers.Input(shape=(1,), name=f) for f in _FEATURE_KEYS] d = keras.layers.concatenate(inputs) for _ in range(hparams.get('num_layers')): # pytype: disable=wrong-arg-types d = keras.layers.Dense(8, activation='relu')(d) output = keras.layers.Dense(3, activation='softmax')(d) model = keras.Model(inputs=inputs, outputs=output) model.compile( optimizer=keras.optimizers.Adam(hparams.get('learning_rate')), loss='sparse_categorical_crossentropy', metrics=[keras.metrics.CategoricalAccuracy(name='accuracy')]) absl.logging.info(model.summary()) return model
def build_model(hp: HyperParameters): inputs = tf.keras.Input((15,)) x = inputs y = inputs t_dropout = hp.Float('target_dropout', 0.0, 0.5, 0.1, default=0.2) p_dropout = hp.Float('pretrain_dropout', 0.0, 0.5, 0.1, default=0.2) for i in range(1): # hidden layer x = tf.keras.layers.Dense(2**hp.Int('target_exponent_{}'.format(i), 5, 8, default=6), activation='relu', kernel_initializer='he_uniform', name='target_dense_{}'.format(i))(x) y = tf.keras.layers.Dense(2**hp.Int('pretrain_exponent_{}'.format(i), 5, 8, default=6), activation='relu', kernel_initializer='he_uniform', name='pretrain_dense_{}'.format(i))(y) a = tf.keras.layers.Dense(2**hp.Int('adapter_exponent_{}'.format(i), 2, 6, default=4), activation='relu', kernel_initializer='he_uniform', name='target_adapter_{}'.format(i))(y) # dropout layer x = tf.keras.layers.Dropout(t_dropout, name='target_dropout_{}'.format(i))(x) x = tf.keras.layers.concatenate([x, a], name='target_concat_{}'.format(i)) y = tf.keras.layers.Dropout(p_dropout, name='pretrain_dropout_{}'.format(i))(y) x = tf.keras.layers.Dense(18, activation='softmax', dtype='float32', name='target_output')(x) y = tf.keras.layers.Dense(18, activation='softmax', dtype='float32', name='pretrain_output')(y) model = tf.keras.Model(inputs=inputs, outputs=[x, y]) return model
def _build_keras_model(hparams: kerastuner.HyperParameters) -> tf.keras.Model: """Creates a DNN Keras model for classifying iris data. Args: hparams: Holds HyperParameters for tuning. Returns: A Keras Model. """ model = keras.Sequential() model.add( keras.layers.Dense(8, activation='relu', input_shape=(len(_FEATURE_KEYS), ))) for _ in range(hparams.get('num_layers')): # pytype: disable=wrong-arg-types model.add(keras.layers.Dense(8, activation='relu')) model.add(keras.layers.Dense(3, activation='softmax')) model.compile(optimizer=keras.optimizers.Adam( hparams.get('learning_rate')), loss='categorical_crossentropy', metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy')]) absl.logging.info(model.summary()) return model
def build_hyper_mmoe(hp: HyperParameters, n_tasks: int, all_columns: List[str], cat_features_dim: Dict[str, int], val_data: Tuple, output_layer_activation: str): """ Build Multi-Gate Mixture of Experts Parameters ---------- hp: instance of HyperParameters Hyper-Parameters that define architecture and training of neural networks Returns ------- """ hidden_layer_activation = hp.Choice("hidden_layer_activation", ["elu", "relu", "selu"]) output_layer_activation = hp.Fixed("output_layer_activation", output_layer_activation) experts = build_experts(hp) task_towers = build_task_towers(hp, n_tasks) preprocessing_layer = build_preprocessing_layer_uci_income( hp, all_columns, cat_features_dim) mmoe = MultiGateMixtureOfExperts(experts, task_towers, base_layer=preprocessing_layer) input_layer = Input(shape=(len(all_columns), )) output_layer = mmoe(input_layer) model = Model(inputs=input_layer, outputs=output_layer) model.compile( loss=['binary_crossentropy', 'binary_crossentropy'], optimizer=build_optimizer(hp), validation_data=val_data, metrics=[tf.keras.metrics.AUC()] # , tf.keras.metrics.AUC()] ) return model
def build_preprocessing_layer_uci_income(hp: HyperParameters, all_columns: List[str], cat_features_dim: Dict[str, int] ): """ Helper method that builds preprocesing layer for UCI Census Income dataset. """ feature_sparsity_threshold = hp.Int("feature_sparsity", min_value=3, max_value=10, default=3) return PreprocessingLayer(all_columns, cat_features_dim, feature_sparsity_threshold=feature_sparsity_threshold)
def init_hyperparameters(): """ initializes :return: """ hp = HyperParameters() hp.Fixed("duplicate convolutional layers", 8) hp.Fixed("End Layers", 20) hp.Fixed("Vertical Convolution", 3) hp.Fixed("Horizontal Convolution", 3) hp.Fixed("MSE Lambda", 70) hp.Fixed("positive case Lambda", 70) return hp
def construct_model(self, tuned_params: Dict[str, Union[int, float]], hps: HyperParameters = None) -> Model: hpf = HyperParameterFactory(self.default_parameters_values, tuned_params, hps) filter_0 = hpf.get_choice(FILTER0_NAME, [4, 8, 16, 32]) filter_1 = hpf.get_choice(FILTER1_NAME, [32, 48, 64]) filter_2 = hpf.get_choice(FILTER2_NAME, [64, 96, 128]) max_pool_0 = hpf.get_choice(MAX_POOL_SIZE0, [1, 2]) max_pool_1 = hpf.get_choice(MAX_POOL_SIZE1, [1, 2]) max_pool_2 = hpf.get_choice(MAX_POOL_SIZE2, [1, 2, 4, 8]) dense = hpf.get_int( DENSE_NAME, lambda default: hps.Int( DENSE_NAME, 32, 128, step=8, default=default)) lr = hpf.get_choice(LEARNING_RATE_NAME, [1e-2, 1e-3, 1e-4]) model = Sequential([ Input(name='Input', shape=(12, 12, 7)), Conv2D(filter_0, 2, strides=1, activation=tf.nn.relu, name='Conv2D_0'), MaxPooling2D(max_pool_0, name='MaxPool_0'), Conv2D(filter_1, 3, strides=1, activation=tf.nn.relu, name='Conv2D_1'), MaxPooling2D(max_pool_1, name='MaxPool_1'), # Conv2D(self.get_param_value(FILTER2_NAME, tuned_params), 2, strides=1, activation=tf.nn.relu, # name='Conv2D_2'), # MaxPooling2D(self.get_param_value(MAX_POOL_SIZE2, tuned_params), name='MaxPool_2'), Flatten(name='Flatten'), Dropout(0.1, name='Dropout'), Dense(dense, activation=tf.nn.relu, name='dense'), Dense(5, activation=tf.nn.softmax, name='Output'), ]) loss_fn = tf.keras.losses.CategoricalCrossentropy() opt = tf.keras.optimizers.Adam(learning_rate=lr) model.compile(optimizer=opt, loss=loss_fn, metrics=[tf.keras.metrics.categorical_accuracy]) return model
def build_model(hp: kt.HyperParameters, use_avs_model: bool = False) -> Model: batch_size = config.generation.batch_size if stateful else None layer_names = name_generator('layer') inputs = {} per_stream = {} for col in seq.x_cols: shape = None, *seq.shapes[col][2:] inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col) per_stream[f'{col}'] = inputs[col] per_stream_list = list(per_stream.values()) x = forgiving_concatenate(inputs=per_stream_list, axis=-1, name=layer_names.__next__(), ) for i in range(hp.Int('TEST', 2, 8)): x = layers.LSTM(64, return_sequences=True)(x) outputs = {} loss = {} for col in seq.y_cols: if col in seq.categorical_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation='softmax'), name=col)(x) loss[col] = keras.losses.CategoricalCrossentropy() if col in seq.regression_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation=None), name=col)(x) loss[col] = 'mse' if config.training.AVS_proxy_ratio == 0: logging.log(logging.WARNING, f'Not using AVSModel with superior optimizer due to ' f'{config.training.AVS_proxy_ratio=}.') model = Model(inputs=inputs, outputs=outputs) opt = keras.optimizers.Adam() model.compile( optimizer=opt, loss=loss, metrics=['acc'], ) return model
def fit_hier_embeddings(X_train, X_test, y_train, y_test, out_dim, chemical_embedding_files, taxonomy_embedding_files, results_file='results.csv', hp_file='hp.json', num_runs=1, params=None): params = params or PARAMS hp = HyperParameters() bm = lambda x: build_model(x, len(X_train[0][0]), len(X_train[1][0]), out_dim) tune(X_train, X_test, y_train, y_test, bm, hp, params, num_runs, results_file, hp_file)
def fit_onehot(X_train, X_test, y_train, y_test, output_dim, results_file='results.csv', hp_file='hp.json', num_runs=1, params=None): #one hot params = params or PARAMS hp = HyperParameters() dim1 = X_train[0].shape[1] dim2 = X_train[1].shape[1] out_dim = output_dim bm = lambda x: build_model(hp, dim1, dim2, out_dim) tune(X_train, X_test, y_train, y_test, bm, hp, params, num_runs, results_file, hp_file)
def build(self, hp: HyperParameters): model = keras.models.Sequential([ keras.layers.Reshape((28, 28, 1, 1)), # Introduce streams keras.layers.Lambda(lambda v: tf.stack((v, tf.zeros_like(v)), axis=-1)), # Imaginary part initialized to 0 keras.layers.Lambda(print_return), # Block 1: Shape [batch, 28, 28, channels=8, streams=2, 2] Conv2DH(out_orders=2, out_channels=8), HNonLinearity(), # Defaults to ReLU Conv2DH(out_orders=2, out_channels=8), HBatchNormalization(), # Block 2: Shape [batch, 14, 14, channels=16, streams=2, 2] AvgPool2DH(strides=(2, 2)), Conv2DH(out_orders=2, out_channels=16), HNonLinearity(), Conv2DH(out_orders=2, out_channels=16), HBatchNormalization(), # Block 3: Shape [batch, 7, 7, channels=35, streams=2, 2] AvgPool2DH(), Conv2DH(out_orders=2, out_channels=35), HNonLinearity(), Conv2DH(out_orders=2, out_channels=35), # Block 4: Reduce to magnitudes and apply final activation HFlatten(), keras.layers.Lambda(print_return), keras.layers.Dense(10), keras.layers.Softmax(), ]) model.compile( optimizer=keras.optimizers.Adam( learning_rate=10 ** hp.Float('log_learning_rate', -6, -1, step=0.5, default=-3)), loss='categorical_crossentropy', metrics=['accuracy'] ) return model
best_hps = tuner.get_best_hyperparameters(num_trials=1) # Build the model with the optimal hyperparameters and train it on the data for 50 epochs model = tuner.hypermodel.build(best_hps) history = model.fit( train, epochs=50, validation_data=test, callbacks=SimpNet.get_callbacks(), verbose=2, ) elif mode == 'test': hp = HyperParameters() # best 1 0.34% misclassification # hp.Fixed('weight_init', value='GlorotUniform') # hp.Fixed('base_lr', value=0.21145) # hp.Fixed('decay_steps', value=7185) # hp.Fixed('decay_rate', value=0.115) # hp.Fixed('lr_momentum', value=0.91074) # # best2 # hp.Fixed('weight_init', value='HeUniform') # hp.Fixed('base_lr', value=0.27872) # hp.Fixed('decay_steps', value=3805) # hp.Fixed('decay_rate', value=0.44912) # hp.Fixed('lr_momentum', value=0.93493) # manual tune 0.30% 0.32% misclassification
def build_model(hp: kt.HyperParameters, use_avs_model: bool = False): batch_size = config.generation.batch_size if stateful else None layer_names = name_generator('layer') inputs = {} last_layer = [] for col in seq.x_cols: shape = None, *seq.shapes[col][2:] inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col) last_layer.append(inputs[col]) random.seed(43) for i in range(hp.Int(f'lstm_layers', 2, 7)): outs = [] depth = hp.Int(f'depth_{i}', 4, 64, sampling='log') connections = min(hp.Int(f'connections_{i}', 1, 3), len(last_layer)) dropout = hp.Float(f'dropout_{i}', 0, 0.5) for width_i in range(hp.Int(f'width_{i}', 1, 16)): t = layers.LSTM(depth, return_sequences=True, name=f'lstm{i:03}_{width_i:03}_{layer_names.__next__()}', stateful=stateful, )( forgiving_concatenate(random.sample(last_layer, connections), name=layer_names.__next__())) t = layers.BatchNormalization(name=layer_names.__next__())(t) t = layers.Dropout(dropout, name=layer_names.__next__())(t) outs.append(t) last_layer = outs x = forgiving_concatenate(last_layer) outputs = {} loss = {} for col in seq.y_cols: if col in seq.categorical_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation='softmax'), name=col)(x) loss[col] = keras.losses.CategoricalCrossentropy( label_smoothing=tf.cast(hp.Float('label_smoothing', 0.0, 0.7), 'float32'), ) # does not work well with mixed precision and stateful model if col in seq.regression_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation=None), name=col)(x) loss[col] = 'mse' if stateful or config.training.AVS_proxy_ratio == 0: if config.training.AVS_proxy_ratio == 0: logging.log(logging.WARNING, f'Not using AVSModel with superior optimizer due to ' f'{config.training.AVS_proxy_ratio=}.') model = Model(inputs=inputs, outputs=outputs) opt = keras.optimizers.Adam() else: if use_avs_model: model = AVSModel(inputs=inputs, outputs=outputs, config=config) else: model = Model(inputs=inputs, outputs=outputs) lr_schedule = FlatCosAnnealSchedule(decay_start=len(seq) * 30, # Give extra epochs to big batch_size initial_learning_rate=hp.Choice('initial_learning_rate', [3e-2, 1e-2, 8e-3, ]), decay_steps=len(seq) * 40, alpha=0.01, ) # Ranger hyper params based on https://github.com/fastai/imagenette/blob/master/2020-01-train.md opt = tfa.optimizers.RectifiedAdam(learning_rate=lr_schedule, beta_1=0.95, beta_2=0.99, epsilon=1e-6) opt = tfa.optimizers.Lookahead(opt, sync_period=6, slow_step_size=0.5) model.compile( optimizer=opt, loss=loss, metrics=metrics.create_metrics((not stateful), config), ) return model
def build_model(hp: kt.HyperParameters, use_avs_model: bool = True): batch_size = config.generation.batch_size if stateful else None layer_names = name_generator('layer') inputs = {} per_stream = {} cnn_activation = {'relu': keras.activations.relu, 'elu': keras.activations.elu, 'mish': tfa.activations.mish}[hp.Choice('cnn_activation', ['relu', 'mish'])] cat_cnn_repetition = hp.Int('cat_cnn_repetition', 0, 4) cnn_spatial_dropout = hp.Float('spatial_dropout', 0.0, 0.5) cat_cnn_filters = hp.Int('cat_cnn_filters', 64, 256, sampling='log') reg_cnn_repetition = hp.Int('reg_cnn_repetition', 0, 4) reg_cnn_filters = hp.Int('reg_cnn_filters', 64, 256, sampling='log') cnn_kernel_size = hp.Choice(f'cnn_kernel_size', ['1', '3', '35', '37', ]) for col in seq.x_cols: if col in seq.categorical_cols: shape = None, *seq.shapes[col][2:] inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col) per_stream[col] = inputs[col] for _ in range(cat_cnn_repetition): per_stream[col] = forgiving_concatenate(inputs=[ layers.Conv1D(filters=cat_cnn_filters, kernel_size=int(s), activation=cnn_activation, padding='causal', kernel_initializer='lecun_normal', name=layer_names.__next__())(per_stream[col]) for conv_i, s in enumerate(cnn_kernel_size)], axis=-1, name=layer_names.__next__(), ) per_stream[col] = layers.BatchNormalization(name=layer_names.__next__(), )(per_stream[col]) per_stream[col] = layers.SpatialDropout1D(cnn_spatial_dropout)(per_stream[col]) if col in seq.regression_cols: shape = None, *seq.shapes[col][2:] inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col) per_stream[col] = inputs[col] for _ in range(reg_cnn_repetition): per_stream[col] = forgiving_concatenate(inputs=[ layers.Conv1D(filters=reg_cnn_filters, kernel_size=int(s), activation=cnn_activation, padding='causal', kernel_initializer='lecun_normal', name=layer_names.__next__())(per_stream[col]) for conv_i, s in enumerate(cnn_kernel_size)], axis=-1, name=layer_names.__next__(), ) per_stream[col] = layers.BatchNormalization(name=layer_names.__next__(), )(per_stream[col]) per_stream[col] = layers.SpatialDropout1D(cnn_spatial_dropout)(per_stream[col]) per_stream_list = list(per_stream.values()) x = forgiving_concatenate(inputs=per_stream_list, axis=-1, name=layer_names.__next__(), ) lstm_repetition = hp.Int('lstm_repetition', 0, 4) lstm_dropout = hp.Float('lstm_dropout', 0.0, 0.6) lstm_l2_regularizer = hp.Choice('lstm_l2_regularizer', [1e-2, 1e-4, 1e-6, 0.0]) for i in range(lstm_repetition): if i > 0: x = layers.Dropout(lstm_dropout)(x) x = layers.LSTM(hp.Int(f'lstm_{i}_units', 128, 384, sampling='log'), return_sequences=True, stateful=stateful, name=layer_names.__next__(), kernel_regularizer=keras.regularizers.l2(lstm_l2_regularizer), )(x) x = layers.BatchNormalization(name=layer_names.__next__(), )(x) end_cnn_repetition = hp.Int('end_cnn_repetition', 0, 2) end_spatial_dropout = hp.Float('end_spatial_dropout', 0.0, 0.5) end_cnn_filters = hp.Int('end_cnn_filters', 128, 384, sampling='log') end_cnn_kernel_size = hp.Choice(f'end_cnn_kernel_size', ['1', '3', ]) for _ in range(end_cnn_repetition): x = layers.SpatialDropout1D(end_spatial_dropout)(x) x = forgiving_concatenate(inputs=[ layers.Conv1D(filters=end_cnn_filters, kernel_size=int(s), activation=cnn_activation, padding='causal', kernel_initializer='lecun_normal', name=layer_names.__next__())(x) for conv_i, s in enumerate(end_cnn_kernel_size)], axis=-1, name=layer_names.__next__(), ) x = layers.BatchNormalization(name=layer_names.__next__(), )(x) x = layers.SpatialDropout1D(end_spatial_dropout)(x) outputs = {} loss = {} for col in seq.y_cols: if col in seq.categorical_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation='softmax'), name=col)(x) loss[col] = keras.losses.CategoricalCrossentropy( label_smoothing=tf.cast(hp.Float('label_smoothing', 0.0, 0.6), 'float32'), ) # does not work well with mixed precision and stateful model if col in seq.regression_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation=None), name=col)(x) loss[col] = 'mse' if stateful or config.training.AVS_proxy_ratio == 0: if config.training.AVS_proxy_ratio == 0: logging.log(logging.WARNING, f'Not using AVSModel with superior optimizer due to ' f'{config.training.AVS_proxy_ratio=}.') model = Model(inputs=inputs, outputs=outputs) opt = keras.optimizers.Adam() else: model = AVSModel(inputs=inputs, outputs=outputs, config=config) decay_start_epoch = hp.Int('decay_start_epoch', 15, 40) decay_end_epoch = (decay_start_epoch * 4) // 3 lr_schedule = FlatCosAnnealSchedule(decay_start=len(seq) * decay_start_epoch, # Give extra epochs to big batch_size initial_learning_rate=hp.Choice('initial_learning_rate', [3e-2, 1e-2, 8e-3]), decay_steps=len(seq) * decay_end_epoch, alpha=0.001, ) # Ranger hyper params based on https://github.com/fastai/imagenette/blob/master/2020-01-train.md opt = tfa.optimizers.RectifiedAdam(learning_rate=lr_schedule, beta_1=0.95, beta_2=0.99, epsilon=1e-6) opt = tfa.optimizers.Lookahead(opt, sync_period=6, slow_step_size=0.5) model.compile( optimizer=opt, loss=loss, metrics=metrics.create_metrics((not stateful), config), ) return model