def build_activation_functions(hp: HyperParameters, restricted_hyperparameter_search: bool): """ Helper method for setting activation functions """ if restricted_hyperparameter_search: hp.Fixed("hidden_layer_activation","relu") else: hp.Choice("hidden_layer_activation", ["relu","elu","selu"]) hp.Fixed("output_layer_activation","sigmoid") return hp
def build_hyper_mmoe(hp: HyperParameters, n_tasks: int, all_columns: List[str], cat_features_dim: Dict[str, int], val_data: Tuple, output_layer_activation: str): """ Build Multi-Gate Mixture of Experts Parameters ---------- hp: instance of HyperParameters Hyper-Parameters that define architecture and training of neural networks Returns ------- """ hidden_layer_activation = hp.Choice("hidden_layer_activation", ["elu", "relu", "selu"]) output_layer_activation = hp.Fixed("output_layer_activation", output_layer_activation) experts = build_experts(hp) task_towers = build_task_towers(hp, n_tasks) preprocessing_layer = build_preprocessing_layer_uci_income( hp, all_columns, cat_features_dim) mmoe = MultiGateMixtureOfExperts(experts, task_towers, base_layer=preprocessing_layer) input_layer = Input(shape=(len(all_columns), )) output_layer = mmoe(input_layer) model = Model(inputs=input_layer, outputs=output_layer) model.compile( loss=['binary_crossentropy', 'binary_crossentropy'], optimizer=build_optimizer(hp), validation_data=val_data, metrics=[tf.keras.metrics.AUC()] # , tf.keras.metrics.AUC()] ) return model
def init_hyperparameters(): """ initializes :return: """ hp = HyperParameters() hp.Fixed("duplicate convolutional layers", 8) hp.Fixed("End Layers", 20) hp.Fixed("Vertical Convolution", 3) hp.Fixed("Horizontal Convolution", 3) hp.Fixed("MSE Lambda", 70) hp.Fixed("positive case Lambda", 70) return hp
# best 1 0.34% misclassification # hp.Fixed('weight_init', value='GlorotUniform') # hp.Fixed('base_lr', value=0.21145) # hp.Fixed('decay_steps', value=7185) # hp.Fixed('decay_rate', value=0.115) # hp.Fixed('lr_momentum', value=0.91074) # # best2 # hp.Fixed('weight_init', value='HeUniform') # hp.Fixed('base_lr', value=0.27872) # hp.Fixed('decay_steps', value=3805) # hp.Fixed('decay_rate', value=0.44912) # hp.Fixed('lr_momentum', value=0.93493) # manual tune 0.30% 0.32% misclassification hp.Fixed('weight_init', value='HeUniform') hp.Fixed('base_lr', value=0.1) # hp.Fixed('decay_steps', value=2500) # hp.Fixed('decay_rate', value=0.1) hp.Fixed('lr_momentum', value=0.9) f0 = hp.Fixed('filters0', value=30) f1 = hp.Fixed('filters1', value=40) f2 = hp.Fixed('filters2', value=50) f3 = hp.Fixed('filters3', value=58) f4 = hp.Fixed('filters4', value=70) f5 = hp.Fixed('filters5', value=90) # f0 = hp.Fixed('filters0', value=66) # f1 = hp.Fixed('filters1', value=64) # f2 = hp.Fixed('filters2', value=96)
def fit_sim_model(X_train, X_test, y_train, y_test, model1, model2, results_file='results.csv', embedding_file='sim_embeddings', num_runs=1, hp_file1=None, hp_file2=None, hp_pred_file=None, params=None): params = params or PARAMS kg1 = pd.read_csv('./data/chemicals0.csv') kg2 = pd.read_csv('./data/taxonomy0.csv') kg1 = list(zip(kg1['subject'], kg1['predicate'], kg1['object'])) kg2 = list(zip(kg2['subject'], kg2['predicate'], kg2['object'])) entities1 = set([s for s, p, o in kg1]) | set([o for s, p, o in kg1]) relations1 = set([p for s, p, o in kg1]) entities2 = set([s for s, p, o in kg2]) | set([o for s, p, o in kg2]) relations2 = set([p for s, p, o in kg2]) me1 = {k: i for i, k in enumerate(entities1)} me2 = {k: i for i, k in enumerate(entities2)} mr1 = {k: i for i, k in enumerate(relations1)} mr2 = {k: i for i, k in enumerate(relations2)} kg1 = [(me1[s], mr1[p], me1[o]) for s, p, o in kg1] kg2 = [(me2[s], mr2[p], me2[o]) for s, p, o in kg2] output_dim = 1 X_train, y_train = np.asarray([ (me1[a], me2[b], float(x)) for a, b, x in X_train if a in entities1 and b in entities2 ]), np.asarray([ float(x) for x, a in zip(y_train, X_train) if a[0] in entities1 and a[1] in entities2 ]) X_test, y_test = np.asarray([(me1[a], me2[b], float(x)) for a, b, x in X_test if a in entities1 and b in entities2 ]), np.asarray([ float(x) for x, a in zip(y_test, X_test) if a[0] in entities1 and a[1] in entities2 ]) scores = [] k_best_predictions = [] hp = HyperParameters() kg_lengths = list(map(len, [kg1, kg2])) output_lengths = len(X_train) hp.Fixed('num_entities1', len(entities1)) hp.Fixed('num_entities2', len(entities2)) hp.Fixed('num_relations1', len(relations1)) hp.Fixed('num_relations2', len(relations2)) hp.Fixed('embedding_model1', model1) hp.Fixed('embedding_model2', model2) hp.Fixed('output_dim', output_dim) bs = 1024 if hp_file1 and hp_file2: for i, hp_file in enumerate([hp_file1, hp_file2]): with open(hp_file, 'r') as fp: data = json.load(fp) for k in data: hp.Fixed(k + str(i + 1), data[k]) if k == 'batch_size': bs = min(bs, data[k]) else: for i, m in zip(['1', '2'], [model1, model2]): hp.Choice('dim' + i, [100, 200, 400], default=200) hp.Choice('negative_samples' + i, [10, 100], default=10) if m in ['ConvE', 'ConvR', 'ConvKB']: bs = 128 hp.Choice('loss_function' + i, [ 'pairwize_hinge', 'pairwize_logistic', 'pointwize_hinge', 'pointwize_logistic' ], default='pairwize_hinge') w = kg_lengths[int(i) - 1] / max(kg_lengths) if hp_pred_file: with open(hp_pred_file, 'r') as fp: data = json.load(fp) for k in data: hp.Fixed(k, data[k]) else: MAX_LAYERS = 3 hp.Int('branching_num_layers_chemical', 0, MAX_LAYERS, default=1) hp.Int('branching_num_layers_species', 0, MAX_LAYERS, default=1) hp.Int('branching_num_layers_conc', 0, MAX_LAYERS, default=1) hp.Int('num_layers1', 0, 3, default=1) for i in range(MAX_LAYERS + 1): hp.Choice('branching_units_chemical_' + str(i + 1), [32, 128, 512], default=128) hp.Choice('branching_units_species_' + str(i + 1), [32, 128, 512], default=128) hp.Choice('branching_units_conc_' + str(i + 1), [32, 128, 512], default=128) hp.Choice('units_' + str(i + 1), [32, 128, 512], default=128) # Since inputs are oversampled, we must reduce the weight of losses accordingly. w = output_lengths / max(kg_lengths) hp.Float('loss_weight1', w, 5 * w, sampling='log') hp.Float('loss_weight2', w, 5 * w, sampling='log') hp.Float('classification_loss_weight', w, 5 * w, sampling='log') hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5]) hp.Fixed('batch_size', bs) m = max(map(len, [kg1, kg2, X_train ])) + (bs - max(map(len, [kg1, kg2, X_train])) % bs) Xtr, ytr = prep_data_v2(kg1, kg2, X_train, y_train, max_length=m) Xte, yte = prep_data_v2(kg1, kg2, X_test, y_test, test=True, max_length=max(bs, len(y_test))) tuner = CVTuner(hypermodel=build_model, oracle=kt.oracles.BayesianOptimization( hyperparameters=hp, objective=Objective('val_auc', 'max'), max_trials=params['MAX_TRIALS']), overwrite=True, project_name='tmp/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(11))) tuner.search(Xtr, ytr, epochs=params['SEARCH_MAX_EPOCHS'], batch_size=bs, callbacks=[ EarlyStopping('loss', mode='min', patience=params['PATIENCE']) ], kfolds=params['NUM_FOLDS'], class_weight=params['cw']) results = [] prediction = [] best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] model = tuner.hypermodel.build(best_hps) out = dict() for k in best_hps.values.keys(): out[k] = best_hps.values[k] with open('./sim_hp/%s.json' % hp_pred_file.split('/')[-1].split('_')[0], 'w') as fp: json.dump(out, fp) for _ in range(num_runs): reset_weights(model) model.fit(Xtr, ytr, epochs=params['MAX_EPOCHS'], batch_size=bs, verbose=2, class_weight=params['cw'], callbacks=[ EarlyStopping('loss', mode='min', patience=params['PATIENCE']) ]) r = model.evaluate(Xte, yte, verbose=0, batch_size=bs) results.append(r) W1 = model.get_layer('embedding').get_weights()[0] W2 = model.get_layer('embedding_2').get_weights()[0] np.save(embedding_file + '_chemical_embeddings.npy', W1) np.save(embedding_file + '_chemical_ids.npy', np.asarray(zip(entities1, range(len(entities1))))) np.save(embedding_file + '_taxonomy_embeddings.npy', W2) np.save(embedding_file + '_taxonomy_ids.npy', np.asarray(zip(entities2, range(len(entities2))))) var = np.var(np.asarray(results), axis=0) results = np.mean(np.asarray(results), axis=0) df = pd.DataFrame( data={ 'metric': model.metrics_names, 'value': list(results), 'variance': list(var) }) df.to_csv(results_file)
def tune(cfg): # ========= # Configure # ========= cfg = yaml.full_load(open(cfg)) # Go deep algName = [nm for nm in cfg][0] cfg = cfg[algName] # ====== # Logger # ====== logger = get_logger('Tune', 'INFO') # ======= # Dataset # ======= lmdb_dir = cfg['lmdb_dir'] length = 4000 train = 2000 split = length - train s = np.arange(0, length) np.random.shuffle(s) # *** hardcoded shapes *** # y = list( islice(decaymode_generator(lmdb_dir, "Label", (), np.long), length)) X_1 = list( islice(decaymode_generator(lmdb_dir, "ChargedPFO", (3, 6), np.float32), length)) X_2 = list( islice( decaymode_generator(lmdb_dir, "NeutralPFO", (8, 21), np.float32), length)) X_3 = list( islice(decaymode_generator(lmdb_dir, "ShotPFO", (6, 6), np.float32), length)) X_4 = list( islice(decaymode_generator(lmdb_dir, "ConvTrack", (4, 6), np.float32), length)) y = np.asarray(y)[s] X_1, X_2, X_3, X_4 = np.asarray(X_1)[s], np.asarray(X_2)[s], np.asarray( X_3)[s], np.asarray(X_4)[s] y_train = y[:-split] X_train_1, X_train_2, X_train_3, X_train_4 = X_1[: -split], X_2[: -split], X_3[: -split], X_4[: -split] y_valid = y[-split:] X_valid_1, X_valid_2, X_valid_3, X_valid_4 = X_1[-split:], X_2[ -split:], X_3[-split:], X_4[-split:] # ===== # Model # ===== # build algs architecture, then print to console model_ftn = partial(getattr(ModelModule, cfg['model']), cfg['arch']) model = model_ftn() logger.info(model.summary()) hp = HyperParameters() hp.Fixed("n_layers_tdd_default", 3) hp.Fixed("n_layers_fc_default", 3) tuner = RandomSearch( getattr(ModelModule, cfg['tune_model']), hyperparameters=hp, tune_new_entries=True, objective='val_loss', max_trials=20, executions_per_trial=2, directory=os.path.join(cfg['save_dir'], cfg['tune']), project_name=cfg['tune'], distribution_strategy=tf.distribute.MirroredStrategy(), ) logger.info('Search space summary: ') tuner.search_space_summary() logger.info('Now searching ... ') tuner.search([X_train_1, X_train_2, X_train_3, X_train_4], y_train, steps_per_epoch=int(train / 200), epochs=20, validation_steps=int(split / 200), validation_data=([X_valid_1, X_valid_2, X_valid_3, X_valid_4], y_valid), workers=10, verbose=0) logger.info('Done! ') models = tuner.get_best_models(num_models=8) tuner.results_summary() logger.info('Saving best models ... ') for i, model in enumerate(models): arch = model.to_json() with open( os.path.join(cfg['save_dir'], cfg['tune'], f'architecture-{i}.json'), 'w') as arch_file: arch_file.write(arch) model.save_weights( os.path.join(cfg['save_dir'], cfg['tune'], f'weights-{i}.h5'), 'w') logger.info('Done! ')
def build_model(image_shape, image_type, vocabulary_size, max_len_word, params=None): def _builder(hp): kernel_size = (3, 3) pool_size = (2, 2) activation = 'relu' n_layers = hp.Range('n_layers', min_value=2, max_value=6, step=1, default=5) n_conv = hp.Range('n_conv', min_value=1, max_value=6, step=1, default=2) n_base_filters = hp.Choice('n_base_filters', values=[8, 16, 32], default=8) kernel_initializer = hp.Choice( 'kernel_initializer', values=['glorot_uniform', 'he_normal', 'he_uniform'], default='he_uniform') use_dense_layer = hp.Choice('use_dense_layer', values=['yes', 'no'], default='yes') dense_units = hp.Choice('dense_units', values=[512, 1024, 2048], default=2048) optimizer_name = hp.Choice('optimizer_name', values=['sgd', 'adam', 'rmsprop'], default='rmsprop') starting_lr = hp.Choice('starting_lr', values=[1e-1, 1e-2, 1e-3, 1e-4, 1e-5], default=1e-3) momentum = hp.Choice('momentum', values=[0.9, 0.95, 0.99], default=0.9) tf.keras.backend.clear_session() inputs = tf.keras.layers.Input(name='image', shape=image_shape, dtype=image_type) x = inputs for i in range(n_layers): n_filters = n_base_filters * 2**min(i, 4) for _ in range(n_conv): x = tf.keras.layers.Conv2D( n_filters, kernel_size, padding='same', kernel_initializer=kernel_initializer)(x) x = tf.keras.layers.BatchNormalization()(x) x = tf.keras.layers.Activation(activation)(x) x = tf.keras.layers.MaxPooling2D(pool_size)(x) x = tf.keras.layers.Dropout(0.2)(x) x = tf.keras.layers.Flatten()(x) if use_dense_layer == 'yes': x = tf.keras.layers.Dense(dense_units)(x) x = tf.keras.layers.Activation(activation)(x) x = tf.keras.layers.Dropout(0.7)(x) outputs = [] for i in range(max_len_word): out = tf.keras.layers.Dense(vocabulary_size, activation='softmax', name=f'character_{i}')(x) outputs.append(out) outputs = tf.keras.layers.Concatenate()(outputs) outputs = tf.keras.layers.Reshape( (max_len_word, vocabulary_size))(outputs) model = tf.keras.Model(inputs=inputs, outputs=outputs) # Optimizer Parameters sgd_params = {} sgd_params['learning_rate'] = starting_lr sgd_params['momentum'] = momentum sgd_params['nesterov'] = True adam_params = {} adam_params['learning_rate'] = starting_lr adam_params['amsgrad'] = True rmsprop_params = {} rmsprop_params['learning_rate'] = starting_lr # Build optimizer. if optimizer_name == 'sgd': optimizer = tf.keras.optimizers.SGD(**sgd_params) elif optimizer_name == 'adam': optimizer = tf.keras.optimizers.Adam(**adam_params) elif optimizer_name == 'rmsprop': optimizer = tf.keras.optimizers.RMSprop(**rmsprop_params) # Compile model model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model hp = HyperParameters() if params: for key, value in params.items(): hp.Fixed(key, value) return _builder(hp)
def init_hyperparameters(): """ initializes the hyperparameters for the model :return: """ hp = HyperParameters() hp.Fixed("embedding dim", 9) hp.Fixed("conv_1 size", 18) hp.Fixed("conv_1 stride", 4) hp.Fixed("conv_2 size", 18) hp.Fixed("conv_2 stride", 4) hp.Fixed("conv_3 size", 18) hp.Fixed("conv_3 stride", 4) hp.Fixed("conv_4 size", 18) hp.Fixed("conv_4 stride", 4) hp.Fixed("lstm depth", 2) hp.Fixed("lstm breadth", 8) hp.Fixed("end depth", 1) hp.Fixed("end breadth", 8) hp.Fixed("learning rate", -3) hp.Fixed("dropout", constants.text_rec_dropout) return hp
def optimize_model(model, kg1, kg2): bs = int(256) kg1 = pad(kg1, bs) kg2 = pad(kg2, bs) kg1 = np.asarray(kg1) kg2 = np.asarray(kg2) embeddings = {} model_name = model for kg, name in zip([kg1, kg2], ['_chemical', '_taxonomy']): N = len(set([s for s, _, _ in kg]) | set([o for _, _, o in kg])) M = len(set([p for _, p, _ in kg])) hp = HyperParameters() hp.Fixed('embedding_model', model_name) hp.Fixed('num_entities', value=N) hp.Fixed('num_relations', value=M) lfs = [ 'pairwize_hinge', 'pairwize_logistic', 'pointwize_hinge', 'pointwize_logistic' ] hp.Int('margin', 1, 10, default=1) hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5]) if model in ['ConvE', 'ConvR', 'ConvKB']: batch_size = 128 hp.Fixed('hidden_dp', 0.2) else: batch_size = bs hp.Choice('regularization', [0.0, 0.01, 0.001], default=0.0) if model_name in ['TransE', 'HAKE', 'pRotatE', 'RotatE']: hp.Int('gamma', 0, 20, default=0) hp.Choice('loss_function', lfs) hp.Fixed('dp', 0.2) hp.Choice('dim', [100, 200, 400], default=200) hp.Choice('negative_samples', [10, 100], default=10) hp.Fixed('batch_size', batch_size) tuner = BayesianOptimization( build_model, hyperparameters=hp, objective=Objective('relative_loss', 'min'), max_trials=MAX_TRIALS, overwrite=True, project_name='tmp/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(11))) tuner.search(kg, epochs=SEARCH_MAX_EPOCHS, batch_size=batch_size, callbacks=[ ClearTrainingOutput(), MyCallback(kg), TerminateOnNaN(), TimeStopping(SECONDS_PER_TRAIL), EarlyStopping('loss', min_delta=1e-5, patience=3) ], verbose=1) tuner.results_summary() best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] model = tuner.hypermodel.build(best_hps) out = dict() for k in best_hps.values.keys(): out[k] = best_hps.values[k] with open('./pretrained_hp/%s%s_kg.json' % (model_name, name), 'w') as fp: json.dump(out, fp) model.fit(kg, epochs=MAX_EPOCHS, batch_size=batch_size, callbacks=[ EarlyStopping('loss', min_delta=1e-5, patience=3), ReduceLROnPlateau('loss', min_delta=1e-5, patience=3) ]) embeddings[name] = model.entity_embedding.get_weights()[0] return embeddings