def define_hp(hp_model = None): hp = HyperParameters() if(hp_model is None): hp_model = SmallImagesHP() hp.Int(name = 'init', min_value = hp_model.init_min, max_value = hp_model.init_max, step = 32) hp.Int(name = 'cnn_layers', min_value = hp_model.cnn_layers_min, max_value = hp_model.cnn_layers_max, step = 1) for i in range(1, hp_model.cnn_layers_max + 1): hp.Int(name = 'cnn_{0}'.format(i), min_value = hp_model.cnn_min, max_value = hp_model.cnn_max, step = 32) hp.Int(name = 'cnn2_layers', min_value = hp_model.cnn2_layers_min, max_value = hp_model.cnn2_layers_max, step = 1) for i in range(1, hp_model.cnn2_layers_max + 1): hp.Int(name = 'cnn2_{0}'.format(i), min_value = hp_model.cnn2_min, max_value = hp_model.cnn2_max, step = 32) hp.Int(name = 'dense', min_value = hp_model.dense_min, max_value = hp_model.dense_max, step = hp_model.dense_step) hp.Int(name = 'dense2', min_value = hp_model.dense2_min, max_value = hp_model.dense2_max, step = hp_model.dense2_step) hp.Choice('dropout', hp_model.dropout) hp.Choice('learning_rate', hp_model.learning_rate) return hp
def build_activation_functions(hp: HyperParameters, restricted_hyperparameter_search: bool): """ Helper method for setting activation functions """ if restricted_hyperparameter_search: hp.Fixed("hidden_layer_activation","relu") else: hp.Choice("hidden_layer_activation", ["relu","elu","selu"]) hp.Fixed("output_layer_activation","sigmoid") return hp
def _build_model( hp: HyperParameters, input_layer: KerasTensor, encoded_layer: KerasTensor, ) -> keras.Model: """Build the part of the architecture tunable by keras-tuner. Note: It is a relatively simple dense network, with self-normalizing layers. Args: hp: hyperparameters passed by the tuner. input layer: The input layer of the model. encoded_layer: The encoding layer of the model. Returns: A tunable keras functional model. """ x = encoded_layer for i in range(hp.Int("dense_layers", 1, 3, default=2)): x = layers.Dense( units=hp.Int(f"units_layer_{i + 1}", min_value=32, max_value=256, step=32, default=64), activation="selu", kernel_initializer=tf.keras.initializers.LecunNormal(), )(encoded_layer) x = layers.AlphaDropout(0.5)(x) output_layer = layers.Dense(1, activation="sigmoid")(x) model = keras.Model(input_layer, output_layer) model.compile( optimizer=keras.optimizers.Adam( hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4], default=1e-3)), loss="binary_crossentropy", metrics=[ "accuracy", tfa.metrics.F1Score(num_classes=2, average="micro", threshold=0.5, name="f1_score"), ], ) return model
def build_optimizer(hp: HyperParameters): """ Helper method that defines hyperparameter optimization for optimizer""" optimizer = hp.Choice(name="optimizer", values=["adam","sgd","rms"], default="adam") learning_rate = hp.Float(name="learning_rate", min_value=1e-4, max_value=5e-3, sampling="log", default=1e-3) # probably could use enums here if optimizer == "adam": return Adam(learning_rate=learning_rate) elif optimizer == "sgd": return SGD(learning_rate=learning_rate) elif optimizer == "rms": return RMSprop(learning_rate=learning_rate) else: raise NotImplementedError()
def build_hyper_mmoe(hp: HyperParameters, n_tasks: int, all_columns: List[str], cat_features_dim: Dict[str, int], val_data: Tuple, output_layer_activation: str): """ Build Multi-Gate Mixture of Experts Parameters ---------- hp: instance of HyperParameters Hyper-Parameters that define architecture and training of neural networks Returns ------- """ hidden_layer_activation = hp.Choice("hidden_layer_activation", ["elu", "relu", "selu"]) output_layer_activation = hp.Fixed("output_layer_activation", output_layer_activation) experts = build_experts(hp) task_towers = build_task_towers(hp, n_tasks) preprocessing_layer = build_preprocessing_layer_uci_income( hp, all_columns, cat_features_dim) mmoe = MultiGateMixtureOfExperts(experts, task_towers, base_layer=preprocessing_layer) input_layer = Input(shape=(len(all_columns), )) output_layer = mmoe(input_layer) model = Model(inputs=input_layer, outputs=output_layer) model.compile( loss=['binary_crossentropy', 'binary_crossentropy'], optimizer=build_optimizer(hp), validation_data=val_data, metrics=[tf.keras.metrics.AUC()] # , tf.keras.metrics.AUC()] ) return model
def build_model(hp: kt.HyperParameters, use_avs_model: bool = False): batch_size = config.generation.batch_size if stateful else None layer_names = name_generator('layer') inputs = {} last_layer = [] for col in seq.x_cols: shape = None, *seq.shapes[col][2:] inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col) last_layer.append(inputs[col]) random.seed(43) for i in range(hp.Int(f'lstm_layers', 2, 7)): outs = [] depth = hp.Int(f'depth_{i}', 4, 64, sampling='log') connections = min(hp.Int(f'connections_{i}', 1, 3), len(last_layer)) dropout = hp.Float(f'dropout_{i}', 0, 0.5) for width_i in range(hp.Int(f'width_{i}', 1, 16)): t = layers.LSTM(depth, return_sequences=True, name=f'lstm{i:03}_{width_i:03}_{layer_names.__next__()}', stateful=stateful, )( forgiving_concatenate(random.sample(last_layer, connections), name=layer_names.__next__())) t = layers.BatchNormalization(name=layer_names.__next__())(t) t = layers.Dropout(dropout, name=layer_names.__next__())(t) outs.append(t) last_layer = outs x = forgiving_concatenate(last_layer) outputs = {} loss = {} for col in seq.y_cols: if col in seq.categorical_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation='softmax'), name=col)(x) loss[col] = keras.losses.CategoricalCrossentropy( label_smoothing=tf.cast(hp.Float('label_smoothing', 0.0, 0.7), 'float32'), ) # does not work well with mixed precision and stateful model if col in seq.regression_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation=None), name=col)(x) loss[col] = 'mse' if stateful or config.training.AVS_proxy_ratio == 0: if config.training.AVS_proxy_ratio == 0: logging.log(logging.WARNING, f'Not using AVSModel with superior optimizer due to ' f'{config.training.AVS_proxy_ratio=}.') model = Model(inputs=inputs, outputs=outputs) opt = keras.optimizers.Adam() else: if use_avs_model: model = AVSModel(inputs=inputs, outputs=outputs, config=config) else: model = Model(inputs=inputs, outputs=outputs) lr_schedule = FlatCosAnnealSchedule(decay_start=len(seq) * 30, # Give extra epochs to big batch_size initial_learning_rate=hp.Choice('initial_learning_rate', [3e-2, 1e-2, 8e-3, ]), decay_steps=len(seq) * 40, alpha=0.01, ) # Ranger hyper params based on https://github.com/fastai/imagenette/blob/master/2020-01-train.md opt = tfa.optimizers.RectifiedAdam(learning_rate=lr_schedule, beta_1=0.95, beta_2=0.99, epsilon=1e-6) opt = tfa.optimizers.Lookahead(opt, sync_period=6, slow_step_size=0.5) model.compile( optimizer=opt, loss=loss, metrics=metrics.create_metrics((not stateful), config), ) return model
def build_model(hp: kt.HyperParameters, use_avs_model: bool = True): batch_size = config.generation.batch_size if stateful else None layer_names = name_generator('layer') inputs = {} per_stream = {} cnn_activation = {'relu': keras.activations.relu, 'elu': keras.activations.elu, 'mish': tfa.activations.mish}[hp.Choice('cnn_activation', ['relu', 'mish'])] cat_cnn_repetition = hp.Int('cat_cnn_repetition', 0, 4) cnn_spatial_dropout = hp.Float('spatial_dropout', 0.0, 0.5) cat_cnn_filters = hp.Int('cat_cnn_filters', 64, 256, sampling='log') reg_cnn_repetition = hp.Int('reg_cnn_repetition', 0, 4) reg_cnn_filters = hp.Int('reg_cnn_filters', 64, 256, sampling='log') cnn_kernel_size = hp.Choice(f'cnn_kernel_size', ['1', '3', '35', '37', ]) for col in seq.x_cols: if col in seq.categorical_cols: shape = None, *seq.shapes[col][2:] inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col) per_stream[col] = inputs[col] for _ in range(cat_cnn_repetition): per_stream[col] = forgiving_concatenate(inputs=[ layers.Conv1D(filters=cat_cnn_filters, kernel_size=int(s), activation=cnn_activation, padding='causal', kernel_initializer='lecun_normal', name=layer_names.__next__())(per_stream[col]) for conv_i, s in enumerate(cnn_kernel_size)], axis=-1, name=layer_names.__next__(), ) per_stream[col] = layers.BatchNormalization(name=layer_names.__next__(), )(per_stream[col]) per_stream[col] = layers.SpatialDropout1D(cnn_spatial_dropout)(per_stream[col]) if col in seq.regression_cols: shape = None, *seq.shapes[col][2:] inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col) per_stream[col] = inputs[col] for _ in range(reg_cnn_repetition): per_stream[col] = forgiving_concatenate(inputs=[ layers.Conv1D(filters=reg_cnn_filters, kernel_size=int(s), activation=cnn_activation, padding='causal', kernel_initializer='lecun_normal', name=layer_names.__next__())(per_stream[col]) for conv_i, s in enumerate(cnn_kernel_size)], axis=-1, name=layer_names.__next__(), ) per_stream[col] = layers.BatchNormalization(name=layer_names.__next__(), )(per_stream[col]) per_stream[col] = layers.SpatialDropout1D(cnn_spatial_dropout)(per_stream[col]) per_stream_list = list(per_stream.values()) x = forgiving_concatenate(inputs=per_stream_list, axis=-1, name=layer_names.__next__(), ) lstm_repetition = hp.Int('lstm_repetition', 0, 4) lstm_dropout = hp.Float('lstm_dropout', 0.0, 0.6) lstm_l2_regularizer = hp.Choice('lstm_l2_regularizer', [1e-2, 1e-4, 1e-6, 0.0]) for i in range(lstm_repetition): if i > 0: x = layers.Dropout(lstm_dropout)(x) x = layers.LSTM(hp.Int(f'lstm_{i}_units', 128, 384, sampling='log'), return_sequences=True, stateful=stateful, name=layer_names.__next__(), kernel_regularizer=keras.regularizers.l2(lstm_l2_regularizer), )(x) x = layers.BatchNormalization(name=layer_names.__next__(), )(x) end_cnn_repetition = hp.Int('end_cnn_repetition', 0, 2) end_spatial_dropout = hp.Float('end_spatial_dropout', 0.0, 0.5) end_cnn_filters = hp.Int('end_cnn_filters', 128, 384, sampling='log') end_cnn_kernel_size = hp.Choice(f'end_cnn_kernel_size', ['1', '3', ]) for _ in range(end_cnn_repetition): x = layers.SpatialDropout1D(end_spatial_dropout)(x) x = forgiving_concatenate(inputs=[ layers.Conv1D(filters=end_cnn_filters, kernel_size=int(s), activation=cnn_activation, padding='causal', kernel_initializer='lecun_normal', name=layer_names.__next__())(x) for conv_i, s in enumerate(end_cnn_kernel_size)], axis=-1, name=layer_names.__next__(), ) x = layers.BatchNormalization(name=layer_names.__next__(), )(x) x = layers.SpatialDropout1D(end_spatial_dropout)(x) outputs = {} loss = {} for col in seq.y_cols: if col in seq.categorical_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation='softmax'), name=col)(x) loss[col] = keras.losses.CategoricalCrossentropy( label_smoothing=tf.cast(hp.Float('label_smoothing', 0.0, 0.6), 'float32'), ) # does not work well with mixed precision and stateful model if col in seq.regression_cols: shape = seq.shapes[col][-1] outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation=None), name=col)(x) loss[col] = 'mse' if stateful or config.training.AVS_proxy_ratio == 0: if config.training.AVS_proxy_ratio == 0: logging.log(logging.WARNING, f'Not using AVSModel with superior optimizer due to ' f'{config.training.AVS_proxy_ratio=}.') model = Model(inputs=inputs, outputs=outputs) opt = keras.optimizers.Adam() else: model = AVSModel(inputs=inputs, outputs=outputs, config=config) decay_start_epoch = hp.Int('decay_start_epoch', 15, 40) decay_end_epoch = (decay_start_epoch * 4) // 3 lr_schedule = FlatCosAnnealSchedule(decay_start=len(seq) * decay_start_epoch, # Give extra epochs to big batch_size initial_learning_rate=hp.Choice('initial_learning_rate', [3e-2, 1e-2, 8e-3]), decay_steps=len(seq) * decay_end_epoch, alpha=0.001, ) # Ranger hyper params based on https://github.com/fastai/imagenette/blob/master/2020-01-train.md opt = tfa.optimizers.RectifiedAdam(learning_rate=lr_schedule, beta_1=0.95, beta_2=0.99, epsilon=1e-6) opt = tfa.optimizers.Lookahead(opt, sync_period=6, slow_step_size=0.5) model.compile( optimizer=opt, loss=loss, metrics=metrics.create_metrics((not stateful), config), ) return model
def fit_sim_model(X_train, X_test, y_train, y_test, model1, model2, results_file='results.csv', embedding_file='sim_embeddings', num_runs=1, hp_file1=None, hp_file2=None, hp_pred_file=None, params=None): params = params or PARAMS kg1 = pd.read_csv('./data/chemicals0.csv') kg2 = pd.read_csv('./data/taxonomy0.csv') kg1 = list(zip(kg1['subject'], kg1['predicate'], kg1['object'])) kg2 = list(zip(kg2['subject'], kg2['predicate'], kg2['object'])) entities1 = set([s for s, p, o in kg1]) | set([o for s, p, o in kg1]) relations1 = set([p for s, p, o in kg1]) entities2 = set([s for s, p, o in kg2]) | set([o for s, p, o in kg2]) relations2 = set([p for s, p, o in kg2]) me1 = {k: i for i, k in enumerate(entities1)} me2 = {k: i for i, k in enumerate(entities2)} mr1 = {k: i for i, k in enumerate(relations1)} mr2 = {k: i for i, k in enumerate(relations2)} kg1 = [(me1[s], mr1[p], me1[o]) for s, p, o in kg1] kg2 = [(me2[s], mr2[p], me2[o]) for s, p, o in kg2] output_dim = 1 X_train, y_train = np.asarray([ (me1[a], me2[b], float(x)) for a, b, x in X_train if a in entities1 and b in entities2 ]), np.asarray([ float(x) for x, a in zip(y_train, X_train) if a[0] in entities1 and a[1] in entities2 ]) X_test, y_test = np.asarray([(me1[a], me2[b], float(x)) for a, b, x in X_test if a in entities1 and b in entities2 ]), np.asarray([ float(x) for x, a in zip(y_test, X_test) if a[0] in entities1 and a[1] in entities2 ]) scores = [] k_best_predictions = [] hp = HyperParameters() kg_lengths = list(map(len, [kg1, kg2])) output_lengths = len(X_train) hp.Fixed('num_entities1', len(entities1)) hp.Fixed('num_entities2', len(entities2)) hp.Fixed('num_relations1', len(relations1)) hp.Fixed('num_relations2', len(relations2)) hp.Fixed('embedding_model1', model1) hp.Fixed('embedding_model2', model2) hp.Fixed('output_dim', output_dim) bs = 1024 if hp_file1 and hp_file2: for i, hp_file in enumerate([hp_file1, hp_file2]): with open(hp_file, 'r') as fp: data = json.load(fp) for k in data: hp.Fixed(k + str(i + 1), data[k]) if k == 'batch_size': bs = min(bs, data[k]) else: for i, m in zip(['1', '2'], [model1, model2]): hp.Choice('dim' + i, [100, 200, 400], default=200) hp.Choice('negative_samples' + i, [10, 100], default=10) if m in ['ConvE', 'ConvR', 'ConvKB']: bs = 128 hp.Choice('loss_function' + i, [ 'pairwize_hinge', 'pairwize_logistic', 'pointwize_hinge', 'pointwize_logistic' ], default='pairwize_hinge') w = kg_lengths[int(i) - 1] / max(kg_lengths) if hp_pred_file: with open(hp_pred_file, 'r') as fp: data = json.load(fp) for k in data: hp.Fixed(k, data[k]) else: MAX_LAYERS = 3 hp.Int('branching_num_layers_chemical', 0, MAX_LAYERS, default=1) hp.Int('branching_num_layers_species', 0, MAX_LAYERS, default=1) hp.Int('branching_num_layers_conc', 0, MAX_LAYERS, default=1) hp.Int('num_layers1', 0, 3, default=1) for i in range(MAX_LAYERS + 1): hp.Choice('branching_units_chemical_' + str(i + 1), [32, 128, 512], default=128) hp.Choice('branching_units_species_' + str(i + 1), [32, 128, 512], default=128) hp.Choice('branching_units_conc_' + str(i + 1), [32, 128, 512], default=128) hp.Choice('units_' + str(i + 1), [32, 128, 512], default=128) # Since inputs are oversampled, we must reduce the weight of losses accordingly. w = output_lengths / max(kg_lengths) hp.Float('loss_weight1', w, 5 * w, sampling='log') hp.Float('loss_weight2', w, 5 * w, sampling='log') hp.Float('classification_loss_weight', w, 5 * w, sampling='log') hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5]) hp.Fixed('batch_size', bs) m = max(map(len, [kg1, kg2, X_train ])) + (bs - max(map(len, [kg1, kg2, X_train])) % bs) Xtr, ytr = prep_data_v2(kg1, kg2, X_train, y_train, max_length=m) Xte, yte = prep_data_v2(kg1, kg2, X_test, y_test, test=True, max_length=max(bs, len(y_test))) tuner = CVTuner(hypermodel=build_model, oracle=kt.oracles.BayesianOptimization( hyperparameters=hp, objective=Objective('val_auc', 'max'), max_trials=params['MAX_TRIALS']), overwrite=True, project_name='tmp/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(11))) tuner.search(Xtr, ytr, epochs=params['SEARCH_MAX_EPOCHS'], batch_size=bs, callbacks=[ EarlyStopping('loss', mode='min', patience=params['PATIENCE']) ], kfolds=params['NUM_FOLDS'], class_weight=params['cw']) results = [] prediction = [] best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] model = tuner.hypermodel.build(best_hps) out = dict() for k in best_hps.values.keys(): out[k] = best_hps.values[k] with open('./sim_hp/%s.json' % hp_pred_file.split('/')[-1].split('_')[0], 'w') as fp: json.dump(out, fp) for _ in range(num_runs): reset_weights(model) model.fit(Xtr, ytr, epochs=params['MAX_EPOCHS'], batch_size=bs, verbose=2, class_weight=params['cw'], callbacks=[ EarlyStopping('loss', mode='min', patience=params['PATIENCE']) ]) r = model.evaluate(Xte, yte, verbose=0, batch_size=bs) results.append(r) W1 = model.get_layer('embedding').get_weights()[0] W2 = model.get_layer('embedding_2').get_weights()[0] np.save(embedding_file + '_chemical_embeddings.npy', W1) np.save(embedding_file + '_chemical_ids.npy', np.asarray(zip(entities1, range(len(entities1))))) np.save(embedding_file + '_taxonomy_embeddings.npy', W2) np.save(embedding_file + '_taxonomy_ids.npy', np.asarray(zip(entities2, range(len(entities2))))) var = np.var(np.asarray(results), axis=0) results = np.mean(np.asarray(results), axis=0) df = pd.DataFrame( data={ 'metric': model.metrics_names, 'value': list(results), 'variance': list(var) }) df.to_csv(results_file)
def get_tunable_roberta(hp: HyperParameters): ids = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='ids') att = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='att') tok_type_ids = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='tti') config = RobertaConfig.from_pretrained(Config.Roberta.config) roberta_model = TFRobertaModel.from_pretrained(Config.Roberta.model, config=config) roberta_model.trainable = False x = roberta_model(ids, attention_mask=att, token_type_ids=tok_type_ids) use_alpha_dropout = False # hp.Boolean('use_alpha_dropout') if use_alpha_dropout: x1 = keras.layers.AlphaDropout(hp.Choice('dropout1', [0.1, 0.2, 0.3]))(x[0]) x2 = keras.layers.AlphaDropout(hp.Choice('dropout2', [0.1, 0.2, 0.3]))(x[0]) else: x1 = keras.layers.Dropout(hp.Choice('dropout1', [0.1, 0.2, 0.3]))(x[0]) x2 = keras.layers.Dropout(hp.Choice('dropout2', [0.1, 0.2, 0.3]))(x[0]) use_rnn = False # hp.Boolean('use_rnn') if use_rnn: lstm_count = hp.Choice('rnn_count', [1, 2]) for i in range(lstm_count): x1, state1_0, _, state1_1, _ = keras.layers.Bidirectional( keras.layers.LSTM(hp.Int(f'lstm_units1_{i}', 32, 48, step=8), return_sequences=True, return_state=True))(x1) x1 = keras.layers.LeakyReLU()(x1) state1 = keras.layers.concatenate([state1_0, state1_1]) x1 = keras.layers.Attention()([x1, state1]) x2, state2_0, _, state2_1, _ = keras.layers.Bidirectional( keras.layers.LSTM(hp.Int(f'lstm_units2_{i}', 32, 48, step=8), return_sequences=True, return_state=True))(x2) x2 = keras.layers.LeakyReLU()(x2) state2 = keras.layers.concatenate([state2_0, state2_1]) x2 = keras.layers.Attention()([x2, state2]) else: conv_count = hp.Choice('conv_count', [1, 2]) for i in range(conv_count): x1 = keras.layers.Conv1D(hp.Int(f'conv_filter1_{i}', 8, 24, step=8), hp.Int(f'conv_kernel1_{i}', 3, 5, step=1), padding='same')(x1) x1 = keras.layers.LeakyReLU()(x1) x2 = keras.layers.Conv1D(hp.Int(f'conv_filter2_{i}', 8, 24, step=8), hp.Int(f'conv_kernel2_{i}', 3, 5, step=1), padding='same')(x2) x2 = keras.layers.LeakyReLU()(x2) x1 = keras.layers.Conv1D(1, 1)(x1) x1 = keras.layers.Flatten()(x1) x1 = keras.layers.Activation('softmax', name='sts')(x1) x2 = keras.layers.Conv1D(1, 1)(x2) x2 = keras.layers.Flatten()(x2) x2 = keras.layers.Activation('softmax', name='ets')(x2) model = keras.models.Model(inputs=[ids, att, tok_type_ids], outputs=[x1, x2]) optimizer = keras.optimizers.Adam(learning_rate=1e-3) loss = keras.losses.CategoricalCrossentropy( label_smoothing=Config.Train.label_smoothing) model.compile(loss=loss, optimizer=optimizer) return model
def optimize_model(model, kg1, kg2): bs = int(256) kg1 = pad(kg1, bs) kg2 = pad(kg2, bs) kg1 = np.asarray(kg1) kg2 = np.asarray(kg2) embeddings = {} model_name = model for kg, name in zip([kg1, kg2], ['_chemical', '_taxonomy']): N = len(set([s for s, _, _ in kg]) | set([o for _, _, o in kg])) M = len(set([p for _, p, _ in kg])) hp = HyperParameters() hp.Fixed('embedding_model', model_name) hp.Fixed('num_entities', value=N) hp.Fixed('num_relations', value=M) lfs = [ 'pairwize_hinge', 'pairwize_logistic', 'pointwize_hinge', 'pointwize_logistic' ] hp.Int('margin', 1, 10, default=1) hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5]) if model in ['ConvE', 'ConvR', 'ConvKB']: batch_size = 128 hp.Fixed('hidden_dp', 0.2) else: batch_size = bs hp.Choice('regularization', [0.0, 0.01, 0.001], default=0.0) if model_name in ['TransE', 'HAKE', 'pRotatE', 'RotatE']: hp.Int('gamma', 0, 20, default=0) hp.Choice('loss_function', lfs) hp.Fixed('dp', 0.2) hp.Choice('dim', [100, 200, 400], default=200) hp.Choice('negative_samples', [10, 100], default=10) hp.Fixed('batch_size', batch_size) tuner = BayesianOptimization( build_model, hyperparameters=hp, objective=Objective('relative_loss', 'min'), max_trials=MAX_TRIALS, overwrite=True, project_name='tmp/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(11))) tuner.search(kg, epochs=SEARCH_MAX_EPOCHS, batch_size=batch_size, callbacks=[ ClearTrainingOutput(), MyCallback(kg), TerminateOnNaN(), TimeStopping(SECONDS_PER_TRAIL), EarlyStopping('loss', min_delta=1e-5, patience=3) ], verbose=1) tuner.results_summary() best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] model = tuner.hypermodel.build(best_hps) out = dict() for k in best_hps.values.keys(): out[k] = best_hps.values[k] with open('./pretrained_hp/%s%s_kg.json' % (model_name, name), 'w') as fp: json.dump(out, fp) model.fit(kg, epochs=MAX_EPOCHS, batch_size=batch_size, callbacks=[ EarlyStopping('loss', min_delta=1e-5, patience=3), ReduceLROnPlateau('loss', min_delta=1e-5, patience=3) ]) embeddings[name] = model.entity_embedding.get_weights()[0] return embeddings
def build(self, hp: kerastuner.HyperParameters) -> keras.Model: """Build DAN model Notes: This is normally called within a HyperModel context. Args: hp (:obj:`HyperParameters`): `HyperParameters` instance Returns: A built/compiled keras model ready for hyperparameter tuning """ # L1/L2 vals reg_vals = [0.0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1] # --- Model Topology # Feature Embedding Params emb_l1 = hp.Choice("Feature Embedding L1", reg_vals, default=0.0) emb_l2 = hp.Choice("Feature Embedding L2", reg_vals, default=0.0) emb_n = hp.Int("Embedding Dimension", min_value=64, max_value=2048, default=1024, step=64) emb_dropout = hp.Float("Dropout from Embeddings", min_value=0.0, max_value=0.9, step=0.05, default=0.0) final_dropout = hp.Float("Dropout before prediction", min_value=0.0, max_value=0.9, step=0.05, default=0.5) # Final dense layer dense_size = hp.Int("Dense Units", min_value=2, max_value=128, sampling="log", default=14) # --- Model feat_input = keras.Input(shape=(self.input_size, )) # Feature Embeddings embeddings = keras.layers.Embedding( input_dim=self.vocab_size, output_dim=emb_n, embeddings_regularizer=keras.regularizers.l1_l2(emb_l1, emb_l2), mask_zero=True, name="Feature_Embeddings")(feat_input) dropout_1 = keras.layers.Dropout(rate=emb_dropout)(embeddings) # Averaging the embeddings embedding_avg = keras.backend.mean(dropout_1, 1) # Dense layers dense = keras.layers.Dense(dense_size, activation="relu", name='dense_1')(embedding_avg) dropout_2 = keras.layers.Dropout(final_dropout)(dense) activation_fn = "softmax" if self.n_classes > 2 else "sigmoid" output = keras.layers.Dense( units=self.n_classes if self.n_classes > 2 else 1, activation=activation_fn, name="Output")(dropout_2) model = keras.Model(feat_input, output) # --- Learning rate and momentum # lr = hp.Choice( # "Learning Rate", # [1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1]) # momentum = hp.Float("Momentum", min_value=0.0, max_value=0.9, step=0.1) # opt = keras.optimizers.SGD(lr, momentum=momentum) # NOTE: I've had a lot of issues with SGD getting even comparable performance to Adam # so I'm saying we scrap it and just go with Adam. opt = keras.optimizers.Adam() # --- Loss FN # NOTE: I was messing around with focal loss here, but I think that's # harder to justify and explain in this context if self.loss is None: if self.n_classes > 2: loss_fn = keras.losses.categorical_crossentropy else: loss_fn = keras.losses.binary_crossentropy else: loss_fn = self.loss model.compile(optimizer=opt, loss=loss_fn, metrics=self.metrics) return model
def build(self, hp: kerastuner.HyperParameters) -> keras.Model: """Build LSTM model Notes: This is normally called within a HyperModel context. Args: hp (:obj:`HyperParameters`): `HyperParameters` instance Returns: A built/compiled keras model ready for hyperparameter tuning """ # L1/L2 vals reg_vals = [0.0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1] # Model Topology # Should we multiply the feature embeddings by their averages? weighting = hp.Boolean("Feature Weighting") # Should we add a dense layer between RNN and output? final_dense = hp.Boolean("Final Dense Layer") # Feature Embedding Params emb_l1 = hp.Choice("Feature Embedding L1", reg_vals) emb_l2 = hp.Choice("Feature Embedding L2", reg_vals) emb_n = hp.Int("Embedding Dimension", min_value=64, max_value=512, default=64, step=64) # Demog Embedding demog_emb_n = hp.Int("Demographics Embedding Dimension", min_value=1, max_value=64, default=self.n_demog) # Average Embedding Params avg_l1 = hp.Choice("Average Embedding L1", reg_vals, parent_name="Feature Weighting", parent_values=[True]) avg_l2 = hp.Choice("Average Embedding L2", reg_vals, parent_name="Feature Weighting", parent_values=[True]) # LSTM Params lstm_n = hp.Int("LSTM Units", min_value=32, max_value=512, default=32, step=32) lstm_dropout = hp.Float("LSTM Dropout", min_value=0.0, max_value=0.9, default=0.4, step=0.01) lstm_recurrent_dropout = hp.Float("LSTM Recurrent Dropout", min_value=0.0, max_value=0.9, default=0.4, step=0.01) lstm_l1 = hp.Choice("LSTM weights L1", reg_vals) lstm_l2 = hp.Choice("LSTM weights L2", reg_vals) # Final dense layer dense_n = hp.Int("Dense Units", min_value=2, max_value=128, sampling="log", parent_name="Final Dense Layer", parent_values=[True]) # Model code feat_input = keras.Input(shape=(None, None), ragged=True) demog_input = keras.Input(shape=(self.n_demog_bags, )) demog_emb = keras.layers.Embedding( self.n_demog, output_dim=demog_emb_n, mask_zero=True, name="Demographic_Embeddings")(demog_input) demog_avg = keras.layers.Flatten()(demog_emb) emb1 = keras.layers.Embedding( self.vocab_size, output_dim=emb_n, embeddings_regularizer=keras.regularizers.l1_l2(emb_l1, emb_l2), mask_zero=True, name="Feature_Embeddings")(feat_input) if weighting: emb2 = keras.layers.Embedding( self.vocab_size, output_dim=1, embeddings_regularizer=keras.regularizers.l1_l2( avg_l1, avg_l2), mask_zero=True, name="Average_Embeddings")(feat_input) # Multiplying the code embeddings by their respective weights mult = keras.layers.Multiply(name="Embeddings_by_Average")( [emb1, emb2]) avg = keras.layers.Lambda(lambda x: tf.math.reduce_mean(x, axis=2), name="Averaging")(mult) else: avg = keras.layers.Lambda(lambda x: tf.math.reduce_mean(x, axis=2), name="Averaging")(emb1) lstm_layer = keras.layers.LSTM( lstm_n, dropout=lstm_dropout, recurrent_dropout=lstm_recurrent_dropout, recurrent_regularizer=keras.regularizers.l1_l2(lstm_l1, lstm_l2), name="Recurrent")(avg) lstm_layer = keras.layers.Concatenate()([lstm_layer, demog_avg]) if final_dense: lstm_layer = keras.layers.Dense(dense_n, activation="relu", name="pre_output")(lstm_layer) activation_fn = "softmax" if self.n_classes > 2 else "sigmoid" output = keras.layers.Dense( self.n_classes if self.n_classes > 2 else 1, activation=activation_fn, name="Output")(lstm_layer) model = keras.Model([feat_input, demog_input], output) # --- Learning rate and momentum # lr = hp.Choice( # "Learning Rate", # [1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1]) # momentum = hp.Float("Momentum", min_value=0.0, max_value=0.9, step=0.1) # opt = keras.optimizers.SGD(lr, momentum=momentum) opt = keras.optimizers.Adam() # --- Loss FN # NOTE: I was messing around with focal loss here, but I think that's # harder to justify and explain in this context if self.loss is None: if self.n_classes > 2: loss_fn = keras.losses.categorical_crossentropy else: loss_fn = keras.losses.binary_crossentropy else: loss_fn = self.loss model.compile(optimizer=opt, loss=loss_fn, metrics=self.metrics) return model