コード例 #1
0
def build_model(hp: HyperParameters):
    inputs = tf.keras.Input((15,))
    x = inputs
    y = inputs
    t_dropout = hp.Float('target_dropout', 0.0, 0.5, 0.1, default=0.2)
    p_dropout = hp.Float('pretrain_dropout', 0.0, 0.5, 0.1, default=0.2)

    for i in range(1):
        # hidden layer
        x = tf.keras.layers.Dense(2**hp.Int('target_exponent_{}'.format(i), 5, 8, default=6), activation='relu', kernel_initializer='he_uniform', name='target_dense_{}'.format(i))(x)
        y = tf.keras.layers.Dense(2**hp.Int('pretrain_exponent_{}'.format(i), 5, 8, default=6), activation='relu', kernel_initializer='he_uniform', name='pretrain_dense_{}'.format(i))(y)
        a = tf.keras.layers.Dense(2**hp.Int('adapter_exponent_{}'.format(i), 2, 6, default=4), activation='relu', kernel_initializer='he_uniform', name='target_adapter_{}'.format(i))(y)

        # dropout layer
        x = tf.keras.layers.Dropout(t_dropout, name='target_dropout_{}'.format(i))(x)
        x = tf.keras.layers.concatenate([x, a], name='target_concat_{}'.format(i))
        y = tf.keras.layers.Dropout(p_dropout, name='pretrain_dropout_{}'.format(i))(y)

    x = tf.keras.layers.Dense(18, activation='softmax', dtype='float32', name='target_output')(x)
    y = tf.keras.layers.Dense(18, activation='softmax', dtype='float32', name='pretrain_output')(y)
    model = tf.keras.Model(inputs=inputs, outputs=[x, y])

    return model
コード例 #2
0
def build_model(hp: HyperParameters):
    inputs = tf.keras.Input((15, ))
    x = inputs
    dropout = hp.Float('dropout', 0.0, 0.5, 0.1, default=0.2)
    for i in range(1):
        x = tf.keras.layers.Dense(
            2**hp.Int('exponent_{}'.format(i), 5, 8, default=6), 'relu')(x)
        x = tf.keras.layers.Dropout(dropout)(x)

    x = tf.keras.layers.Dense(18, activation='softmax', dtype='float32')(x)
    model = tf.keras.Model(inputs=inputs, outputs=x)
    model.compile('adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model
コード例 #3
0
def build_optimizer(hp: HyperParameters):
    """ Helper method that defines hyperparameter optimization for optimizer"""
    optimizer = hp.Choice(name="optimizer",
                          values=["adam","sgd","rms"],
                          default="adam")
    learning_rate = hp.Float(name="learning_rate",
                             min_value=1e-4,
                             max_value=5e-3,
                             sampling="log",
                             default=1e-3)
    # probably could use enums here
    if optimizer == "adam":
        return Adam(learning_rate=learning_rate)
    elif optimizer == "sgd":
        return SGD(learning_rate=learning_rate)
    elif optimizer == "rms":
        return RMSprop(learning_rate=learning_rate)
    else:
        raise NotImplementedError()
コード例 #4
0
ファイル: hcnn.py プロジェクト: marcelroed/bachelor-math
    def build(self, hp: HyperParameters):
        model = keras.models.Sequential([
            keras.layers.Reshape((28, 28, 1, 1)),  # Introduce streams
            keras.layers.Lambda(lambda v: tf.stack((v, tf.zeros_like(v)), axis=-1)),  # Imaginary part initialized to 0
            keras.layers.Lambda(print_return),

            # Block 1: Shape [batch, 28, 28, channels=8, streams=2, 2]
            Conv2DH(out_orders=2, out_channels=8),
            HNonLinearity(),  # Defaults to ReLU
            Conv2DH(out_orders=2, out_channels=8),
            HBatchNormalization(),

            # Block 2: Shape [batch, 14, 14, channels=16, streams=2, 2]
            AvgPool2DH(strides=(2, 2)),
            Conv2DH(out_orders=2, out_channels=16),
            HNonLinearity(),
            Conv2DH(out_orders=2, out_channels=16),
            HBatchNormalization(),

            # Block 3: Shape [batch, 7, 7, channels=35, streams=2, 2]
            AvgPool2DH(),
            Conv2DH(out_orders=2, out_channels=35),
            HNonLinearity(),
            Conv2DH(out_orders=2, out_channels=35),

            # Block 4: Reduce to magnitudes and apply final activation
            HFlatten(),
            keras.layers.Lambda(print_return),
            keras.layers.Dense(10),
            keras.layers.Softmax(),
        ])

        model.compile(
            optimizer=keras.optimizers.Adam(
                learning_rate=10 ** hp.Float('log_learning_rate', -6, -1, step=0.5, default=-3)),
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        return model
コード例 #5
0
    def build_model(hp: kt.HyperParameters, use_avs_model: bool = False):
        batch_size = config.generation.batch_size if stateful else None
        layer_names = name_generator('layer')

        inputs = {}
        last_layer = []

        for col in seq.x_cols:
            shape = None, *seq.shapes[col][2:]
            inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col)
            last_layer.append(inputs[col])

        random.seed(43)
        for i in range(hp.Int(f'lstm_layers', 2, 7)):
            outs = []
            depth = hp.Int(f'depth_{i}', 4, 64, sampling='log')
            connections = min(hp.Int(f'connections_{i}', 1, 3), len(last_layer))
            dropout = hp.Float(f'dropout_{i}', 0, 0.5)
            for width_i in range(hp.Int(f'width_{i}', 1, 16)):
                t = layers.LSTM(depth, return_sequences=True,
                                name=f'lstm{i:03}_{width_i:03}_{layer_names.__next__()}',
                                stateful=stateful, )(
                    forgiving_concatenate(random.sample(last_layer, connections), name=layer_names.__next__()))
                t = layers.BatchNormalization(name=layer_names.__next__())(t)
                t = layers.Dropout(dropout, name=layer_names.__next__())(t)
                outs.append(t)
            last_layer = outs

        x = forgiving_concatenate(last_layer)
        outputs = {}
        loss = {}
        for col in seq.y_cols:
            if col in seq.categorical_cols:
                shape = seq.shapes[col][-1]
                outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation='softmax'), name=col)(x)
                loss[col] = keras.losses.CategoricalCrossentropy(
                    label_smoothing=tf.cast(hp.Float('label_smoothing', 0.0, 0.7), 'float32'),
                )  # does not work well with mixed precision and stateful model
            if col in seq.regression_cols:
                shape = seq.shapes[col][-1]
                outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation=None), name=col)(x)
                loss[col] = 'mse'

        if stateful or config.training.AVS_proxy_ratio == 0:
            if config.training.AVS_proxy_ratio == 0:
                logging.log(logging.WARNING, f'Not using AVSModel with superior optimizer due to '
                                             f'{config.training.AVS_proxy_ratio=}.')
            model = Model(inputs=inputs, outputs=outputs)
            opt = keras.optimizers.Adam()
        else:
            if use_avs_model:
                model = AVSModel(inputs=inputs, outputs=outputs, config=config)
            else:
                model = Model(inputs=inputs, outputs=outputs)

            lr_schedule = FlatCosAnnealSchedule(decay_start=len(seq) * 30,  # Give extra epochs to big batch_size
                                                initial_learning_rate=hp.Choice('initial_learning_rate',
                                                                                [3e-2, 1e-2, 8e-3, ]),
                                                decay_steps=len(seq) * 40,
                                                alpha=0.01, )
            # Ranger hyper params based on https://github.com/fastai/imagenette/blob/master/2020-01-train.md
            opt = tfa.optimizers.RectifiedAdam(learning_rate=lr_schedule,
                                               beta_1=0.95,
                                               beta_2=0.99,
                                               epsilon=1e-6)
            opt = tfa.optimizers.Lookahead(opt, sync_period=6, slow_step_size=0.5)

        model.compile(
            optimizer=opt,
            loss=loss,
            metrics=metrics.create_metrics((not stateful), config),
        )

        return model
コード例 #6
0
    def build_model(hp: kt.HyperParameters, use_avs_model: bool = True):
        batch_size = config.generation.batch_size if stateful else None
        layer_names = name_generator('layer')

        inputs = {}
        per_stream = {}
        cnn_activation = {'relu': keras.activations.relu,
                          'elu': keras.activations.elu,
                          'mish': tfa.activations.mish}[hp.Choice('cnn_activation', ['relu', 'mish'])]

        cat_cnn_repetition = hp.Int('cat_cnn_repetition', 0, 4)
        cnn_spatial_dropout = hp.Float('spatial_dropout', 0.0, 0.5)
        cat_cnn_filters = hp.Int('cat_cnn_filters', 64, 256, sampling='log')
        reg_cnn_repetition = hp.Int('reg_cnn_repetition', 0, 4)
        reg_cnn_filters = hp.Int('reg_cnn_filters', 64, 256, sampling='log')
        cnn_kernel_size = hp.Choice(f'cnn_kernel_size', ['1', '3', '35', '37', ])

        for col in seq.x_cols:
            if col in seq.categorical_cols:
                shape = None, *seq.shapes[col][2:]
                inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col)
                per_stream[col] = inputs[col]
                for _ in range(cat_cnn_repetition):
                    per_stream[col] = forgiving_concatenate(inputs=[
                        layers.Conv1D(filters=cat_cnn_filters,
                                      kernel_size=int(s),
                                      activation=cnn_activation,
                                      padding='causal',
                                      kernel_initializer='lecun_normal',
                                      name=layer_names.__next__())(per_stream[col])
                        for conv_i, s in enumerate(cnn_kernel_size)],
                        axis=-1, name=layer_names.__next__(), )
                    per_stream[col] = layers.BatchNormalization(name=layer_names.__next__(), )(per_stream[col])
                    per_stream[col] = layers.SpatialDropout1D(cnn_spatial_dropout)(per_stream[col])
            if col in seq.regression_cols:
                shape = None, *seq.shapes[col][2:]
                inputs[col] = layers.Input(batch_size=batch_size, shape=shape, name=col)
                per_stream[col] = inputs[col]
                for _ in range(reg_cnn_repetition):
                    per_stream[col] = forgiving_concatenate(inputs=[
                        layers.Conv1D(filters=reg_cnn_filters,
                                      kernel_size=int(s),
                                      activation=cnn_activation,
                                      padding='causal',
                                      kernel_initializer='lecun_normal',
                                      name=layer_names.__next__())(per_stream[col])
                        for conv_i, s in enumerate(cnn_kernel_size)],
                        axis=-1, name=layer_names.__next__(), )
                    per_stream[col] = layers.BatchNormalization(name=layer_names.__next__(), )(per_stream[col])
                    per_stream[col] = layers.SpatialDropout1D(cnn_spatial_dropout)(per_stream[col])

        per_stream_list = list(per_stream.values())
        x = forgiving_concatenate(inputs=per_stream_list, axis=-1, name=layer_names.__next__(), )

        lstm_repetition = hp.Int('lstm_repetition', 0, 4)
        lstm_dropout = hp.Float('lstm_dropout', 0.0, 0.6)
        lstm_l2_regularizer = hp.Choice('lstm_l2_regularizer', [1e-2, 1e-4, 1e-6, 0.0])

        for i in range(lstm_repetition):
            if i > 0:
                x = layers.Dropout(lstm_dropout)(x)
            x = layers.LSTM(hp.Int(f'lstm_{i}_units', 128, 384, sampling='log'), return_sequences=True,
                            stateful=stateful, name=layer_names.__next__(),
                            kernel_regularizer=keras.regularizers.l2(lstm_l2_regularizer), )(x)
            x = layers.BatchNormalization(name=layer_names.__next__(), )(x)

        end_cnn_repetition = hp.Int('end_cnn_repetition', 0, 2)
        end_spatial_dropout = hp.Float('end_spatial_dropout', 0.0, 0.5)
        end_cnn_filters = hp.Int('end_cnn_filters', 128, 384, sampling='log')
        end_cnn_kernel_size = hp.Choice(f'end_cnn_kernel_size', ['1', '3', ])

        for _ in range(end_cnn_repetition):
            x = layers.SpatialDropout1D(end_spatial_dropout)(x)
            x = forgiving_concatenate(inputs=[
                layers.Conv1D(filters=end_cnn_filters,
                              kernel_size=int(s),
                              activation=cnn_activation,
                              padding='causal',
                              kernel_initializer='lecun_normal',
                              name=layer_names.__next__())(x)
                for conv_i, s in enumerate(end_cnn_kernel_size)],
                axis=-1, name=layer_names.__next__(), )
            x = layers.BatchNormalization(name=layer_names.__next__(), )(x)
            x = layers.SpatialDropout1D(end_spatial_dropout)(x)

        outputs = {}
        loss = {}
        for col in seq.y_cols:
            if col in seq.categorical_cols:
                shape = seq.shapes[col][-1]
                outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation='softmax'), name=col)(x)
                loss[col] = keras.losses.CategoricalCrossentropy(
                    label_smoothing=tf.cast(hp.Float('label_smoothing', 0.0, 0.6), 'float32'),
                )  # does not work well with mixed precision and stateful model
            if col in seq.regression_cols:
                shape = seq.shapes[col][-1]
                outputs[col] = layers.TimeDistributed(layers.Dense(shape, activation=None), name=col)(x)
                loss[col] = 'mse'

        if stateful or config.training.AVS_proxy_ratio == 0:
            if config.training.AVS_proxy_ratio == 0:
                logging.log(logging.WARNING, f'Not using AVSModel with superior optimizer due to '
                                             f'{config.training.AVS_proxy_ratio=}.')
            model = Model(inputs=inputs, outputs=outputs)
            opt = keras.optimizers.Adam()
        else:
            model = AVSModel(inputs=inputs, outputs=outputs, config=config)

            decay_start_epoch = hp.Int('decay_start_epoch', 15, 40)
            decay_end_epoch = (decay_start_epoch * 4) // 3
            lr_schedule = FlatCosAnnealSchedule(decay_start=len(seq) * decay_start_epoch,
                                                # Give extra epochs to big batch_size
                                                initial_learning_rate=hp.Choice('initial_learning_rate',
                                                                                [3e-2, 1e-2, 8e-3]),
                                                decay_steps=len(seq) * decay_end_epoch,
                                                alpha=0.001, )
            # Ranger hyper params based on https://github.com/fastai/imagenette/blob/master/2020-01-train.md
            opt = tfa.optimizers.RectifiedAdam(learning_rate=lr_schedule,
                                               beta_1=0.95,
                                               beta_2=0.99,
                                               epsilon=1e-6)
            opt = tfa.optimizers.Lookahead(opt, sync_period=6, slow_step_size=0.5)

        model.compile(
            optimizer=opt,
            loss=loss,
            metrics=metrics.create_metrics((not stateful), config),
        )

        return model
コード例 #7
0
def fit_sim_model(X_train,
                  X_test,
                  y_train,
                  y_test,
                  model1,
                  model2,
                  results_file='results.csv',
                  embedding_file='sim_embeddings',
                  num_runs=1,
                  hp_file1=None,
                  hp_file2=None,
                  hp_pred_file=None,
                  params=None):
    params = params or PARAMS

    kg1 = pd.read_csv('./data/chemicals0.csv')
    kg2 = pd.read_csv('./data/taxonomy0.csv')

    kg1 = list(zip(kg1['subject'], kg1['predicate'], kg1['object']))
    kg2 = list(zip(kg2['subject'], kg2['predicate'], kg2['object']))

    entities1 = set([s for s, p, o in kg1]) | set([o for s, p, o in kg1])
    relations1 = set([p for s, p, o in kg1])
    entities2 = set([s for s, p, o in kg2]) | set([o for s, p, o in kg2])
    relations2 = set([p for s, p, o in kg2])

    me1 = {k: i for i, k in enumerate(entities1)}
    me2 = {k: i for i, k in enumerate(entities2)}
    mr1 = {k: i for i, k in enumerate(relations1)}
    mr2 = {k: i for i, k in enumerate(relations2)}
    kg1 = [(me1[s], mr1[p], me1[o]) for s, p, o in kg1]
    kg2 = [(me2[s], mr2[p], me2[o]) for s, p, o in kg2]

    output_dim = 1

    X_train, y_train = np.asarray([
        (me1[a], me2[b], float(x)) for a, b, x in X_train
        if a in entities1 and b in entities2
    ]), np.asarray([
        float(x) for x, a in zip(y_train, X_train)
        if a[0] in entities1 and a[1] in entities2
    ])

    X_test, y_test = np.asarray([(me1[a], me2[b], float(x))
                                 for a, b, x in X_test
                                 if a in entities1 and b in entities2
                                 ]), np.asarray([
                                     float(x) for x, a in zip(y_test, X_test)
                                     if a[0] in entities1 and a[1] in entities2
                                 ])

    scores = []
    k_best_predictions = []

    hp = HyperParameters()

    kg_lengths = list(map(len, [kg1, kg2]))
    output_lengths = len(X_train)

    hp.Fixed('num_entities1', len(entities1))
    hp.Fixed('num_entities2', len(entities2))
    hp.Fixed('num_relations1', len(relations1))
    hp.Fixed('num_relations2', len(relations2))

    hp.Fixed('embedding_model1', model1)
    hp.Fixed('embedding_model2', model2)
    hp.Fixed('output_dim', output_dim)

    bs = 1024

    if hp_file1 and hp_file2:
        for i, hp_file in enumerate([hp_file1, hp_file2]):
            with open(hp_file, 'r') as fp:
                data = json.load(fp)
                for k in data:
                    hp.Fixed(k + str(i + 1), data[k])
                    if k == 'batch_size':
                        bs = min(bs, data[k])
    else:
        for i, m in zip(['1', '2'], [model1, model2]):
            hp.Choice('dim' + i, [100, 200, 400], default=200)
            hp.Choice('negative_samples' + i, [10, 100], default=10)
            if m in ['ConvE', 'ConvR', 'ConvKB']:
                bs = 128
            hp.Choice('loss_function' + i, [
                'pairwize_hinge', 'pairwize_logistic', 'pointwize_hinge',
                'pointwize_logistic'
            ],
                      default='pairwize_hinge')
            w = kg_lengths[int(i) - 1] / max(kg_lengths)

    if hp_pred_file:
        with open(hp_pred_file, 'r') as fp:
            data = json.load(fp)
            for k in data:
                hp.Fixed(k, data[k])
    else:
        MAX_LAYERS = 3
        hp.Int('branching_num_layers_chemical', 0, MAX_LAYERS, default=1)
        hp.Int('branching_num_layers_species', 0, MAX_LAYERS, default=1)
        hp.Int('branching_num_layers_conc', 0, MAX_LAYERS, default=1)
        hp.Int('num_layers1', 0, 3, default=1)
        for i in range(MAX_LAYERS + 1):
            hp.Choice('branching_units_chemical_' + str(i + 1), [32, 128, 512],
                      default=128)
            hp.Choice('branching_units_species_' + str(i + 1), [32, 128, 512],
                      default=128)
            hp.Choice('branching_units_conc_' + str(i + 1), [32, 128, 512],
                      default=128)
            hp.Choice('units_' + str(i + 1), [32, 128, 512], default=128)

    # Since inputs are oversampled, we must reduce the weight of losses accordingly.
    w = output_lengths / max(kg_lengths)
    hp.Float('loss_weight1', w, 5 * w, sampling='log')
    hp.Float('loss_weight2', w, 5 * w, sampling='log')
    hp.Float('classification_loss_weight', w, 5 * w, sampling='log')
    hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5])
    hp.Fixed('batch_size', bs)

    m = max(map(len, [kg1, kg2, X_train
                      ])) + (bs - max(map(len, [kg1, kg2, X_train])) % bs)
    Xtr, ytr = prep_data_v2(kg1, kg2, X_train, y_train, max_length=m)
    Xte, yte = prep_data_v2(kg1,
                            kg2,
                            X_test,
                            y_test,
                            test=True,
                            max_length=max(bs, len(y_test)))

    tuner = CVTuner(hypermodel=build_model,
                    oracle=kt.oracles.BayesianOptimization(
                        hyperparameters=hp,
                        objective=Objective('val_auc', 'max'),
                        max_trials=params['MAX_TRIALS']),
                    overwrite=True,
                    project_name='tmp/' + ''.join(
                        random.choice(string.ascii_uppercase + string.digits)
                        for _ in range(11)))

    tuner.search(Xtr,
                 ytr,
                 epochs=params['SEARCH_MAX_EPOCHS'],
                 batch_size=bs,
                 callbacks=[
                     EarlyStopping('loss',
                                   mode='min',
                                   patience=params['PATIENCE'])
                 ],
                 kfolds=params['NUM_FOLDS'],
                 class_weight=params['cw'])

    results = []
    prediction = []
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    model = tuner.hypermodel.build(best_hps)

    out = dict()
    for k in best_hps.values.keys():
        out[k] = best_hps.values[k]
    with open('./sim_hp/%s.json' % hp_pred_file.split('/')[-1].split('_')[0],
              'w') as fp:
        json.dump(out, fp)

    for _ in range(num_runs):
        reset_weights(model)
        model.fit(Xtr,
                  ytr,
                  epochs=params['MAX_EPOCHS'],
                  batch_size=bs,
                  verbose=2,
                  class_weight=params['cw'],
                  callbacks=[
                      EarlyStopping('loss',
                                    mode='min',
                                    patience=params['PATIENCE'])
                  ])
        r = model.evaluate(Xte, yte, verbose=0, batch_size=bs)
        results.append(r)

    W1 = model.get_layer('embedding').get_weights()[0]
    W2 = model.get_layer('embedding_2').get_weights()[0]
    np.save(embedding_file + '_chemical_embeddings.npy', W1)
    np.save(embedding_file + '_chemical_ids.npy',
            np.asarray(zip(entities1, range(len(entities1)))))
    np.save(embedding_file + '_taxonomy_embeddings.npy', W2)
    np.save(embedding_file + '_taxonomy_ids.npy',
            np.asarray(zip(entities2, range(len(entities2)))))

    var = np.var(np.asarray(results), axis=0)
    results = np.mean(np.asarray(results), axis=0)

    df = pd.DataFrame(
        data={
            'metric': model.metrics_names,
            'value': list(results),
            'variance': list(var)
        })
    df.to_csv(results_file)
コード例 #8
0
ファイル: hyper_models.py プロジェクト: yujun001/mtlearn
def build_hyper_l2_constrained(hp: HyperParameters,
                               n_tasks: int,
                               all_columns: List[str],
                               cat_features_dim: Dict[str, int],
                               restricted_hyperparameter_search: bool,
                               feature_sparsity_min: int = 4,
                               feature_sparsity_max: int = 9,
                               min_layers: int = 3,
                               max_layers: int = 6,
                               min_units_per_layer: int = 32,
                               max_units_per_layer: int = 64,
                               min_l2_alpha: float = 1e-1,
                               max_l2_alpha: float = 1e+2
                               ) -> Model:
    """
    Build model for L2 constrained multi-task learning model

    Parameters
    ----------
    hp: instance of HyperParameters
        Hyper-Parameters that define architecture and training of neural networks

    n_tasks: int
        Number of tasks

    all_columns: list
        Names of the features

    cat_features_dim: dict
        Dictionary that maps from the name of categorical feature
        to its dimensionality.

    restricted_hyperparameter_search: bool
        If True, then fixes following hyperparameters and does not optimize them.
        - batch_size = 1024
        - hidden_layer_activation = relu
        - optimizer = sgd

    feature_sparsity_min: int
        Minimum possible value of feature sparsity threshold

    feature_sparsity_max: int
        Maximum possible value of feature sparsity threshold

    min_layers: int
        Minimum number of layers

    max_layers: int
        Maximum number of layers

    min_units_per_layer: int
        Minimum number of neurons per layer

    max_units_per_layer: int
        Maximum number of neurons per layer

    min_l2_alpha: float
        Minimum possible value of l2 regularization coefficient

    max_l2_alpha: float
        Maximium possible value of l2 regularization coefficient

    Returns
    -------
    model: tensorflow.keras.models.Model
        Compiled L2 Constrained Model
    """
    # define activation functions and preproceing layer
    build_activation_functions(hp, restricted_hyperparameter_search)
    preprocessing_layer = build_preprocessing_layer_uci_income(hp,
                                                               all_columns,
                                                               cat_features_dim,
                                                               feature_sparsity_min,
                                                               feature_sparsity_max)
    # propagate input through preprocesing layer
    input_layer = Input(shape=(len(all_columns),))
    x = preprocessing_layer(input_layer)

    # build l2 constrained model
    n_layers = hp.Int("number_of_hidden_layers",
                      min_value=min_layers,
                      max_value=max_layers)
    for i in range(n_layers):
        n_units = hp.Int("n_units_layer_{0}".format(i),
                         min_value=min_units_per_layer,
                         max_value=max_units_per_layer)
        mtl_layers = [Dense(n_units, hp['hidden_layer_activation']) for _ in range(n_tasks)]
        l2_regularizer = hp.Float("l2_regularizer_layer_{0}".format(i),
                                  min_value=min_l2_alpha,
                                  max_value=max_l2_alpha)
        constrained_l2 = ConstrainedMTL(mtl_layers, l1_regularizer=0., l2_regualrizer=l2_regularizer)
        x = constrained_l2(x)
    output_layers = [Dense(1, hp['output_layer_activation'])(x[i]) for i in range(n_tasks)]
    model = Model(inputs=input_layer, outputs=output_layers)
    return model
コード例 #9
0
ファイル: keras.py プロジェクト: cdcai/premier_analysis
    def build(self, hp: kerastuner.HyperParameters) -> keras.Model:
        """Build DAN model

        Notes:
            This is normally called within a HyperModel context.
        Args:
            hp (:obj:`HyperParameters`): `HyperParameters` instance

        Returns:
            A built/compiled keras model ready for hyperparameter tuning
        """

        # L1/L2 vals
        reg_vals = [0.0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]

        # --- Model Topology

        # Feature Embedding Params
        emb_l1 = hp.Choice("Feature Embedding L1", reg_vals, default=0.0)
        emb_l2 = hp.Choice("Feature Embedding L2", reg_vals, default=0.0)

        emb_n = hp.Int("Embedding Dimension",
                       min_value=64,
                       max_value=2048,
                       default=1024,
                       step=64)

        emb_dropout = hp.Float("Dropout from Embeddings",
                               min_value=0.0,
                               max_value=0.9,
                               step=0.05,
                               default=0.0)

        final_dropout = hp.Float("Dropout before prediction",
                                 min_value=0.0,
                                 max_value=0.9,
                                 step=0.05,
                                 default=0.5)

        # Final dense layer
        dense_size = hp.Int("Dense Units",
                            min_value=2,
                            max_value=128,
                            sampling="log",
                            default=14)

        # --- Model
        feat_input = keras.Input(shape=(self.input_size, ))

        # Feature Embeddings
        embeddings = keras.layers.Embedding(
            input_dim=self.vocab_size,
            output_dim=emb_n,
            embeddings_regularizer=keras.regularizers.l1_l2(emb_l1, emb_l2),
            mask_zero=True,
            name="Feature_Embeddings")(feat_input)

        dropout_1 = keras.layers.Dropout(rate=emb_dropout)(embeddings)

        # Averaging the embeddings
        embedding_avg = keras.backend.mean(dropout_1, 1)

        # Dense layers
        dense = keras.layers.Dense(dense_size,
                                   activation="relu",
                                   name='dense_1')(embedding_avg)

        dropout_2 = keras.layers.Dropout(final_dropout)(dense)

        activation_fn = "softmax" if self.n_classes > 2 else "sigmoid"

        output = keras.layers.Dense(
            units=self.n_classes if self.n_classes > 2 else 1,
            activation=activation_fn,
            name="Output")(dropout_2)

        model = keras.Model(feat_input, output)

        # --- Learning rate and momentum
        # lr = hp.Choice(
        #     "Learning Rate",
        #     [1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1])
        # momentum = hp.Float("Momentum", min_value=0.0, max_value=0.9, step=0.1)
        # opt = keras.optimizers.SGD(lr, momentum=momentum)
        # NOTE: I've had a lot of issues with SGD getting even comparable performance to Adam
        # so I'm saying we scrap it and just go with Adam.
        opt = keras.optimizers.Adam()

        # --- Loss FN
        # NOTE: I was messing around with focal loss here, but I think that's
        # harder to justify and explain in this context
        if self.loss is None:
            if self.n_classes > 2:
                loss_fn = keras.losses.categorical_crossentropy
            else:
                loss_fn = keras.losses.binary_crossentropy
        else:
            loss_fn = self.loss
        model.compile(optimizer=opt, loss=loss_fn, metrics=self.metrics)

        return model
コード例 #10
0
ファイル: keras.py プロジェクト: cdcai/premier_analysis
    def build(self, hp: kerastuner.HyperParameters) -> keras.Model:
        """Build LSTM model

        Notes:
            This is normally called within a HyperModel context.
        Args:
            hp (:obj:`HyperParameters`): `HyperParameters` instance

        Returns:
            A built/compiled keras model ready for hyperparameter tuning
        """

        # L1/L2 vals
        reg_vals = [0.0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]

        # Model Topology

        # Should we multiply the feature embeddings by their averages?
        weighting = hp.Boolean("Feature Weighting")

        # Should we add a dense layer between RNN and output?
        final_dense = hp.Boolean("Final Dense Layer")

        # Feature Embedding Params
        emb_l1 = hp.Choice("Feature Embedding L1", reg_vals)
        emb_l2 = hp.Choice("Feature Embedding L2", reg_vals)

        emb_n = hp.Int("Embedding Dimension",
                       min_value=64,
                       max_value=512,
                       default=64,
                       step=64)

        # Demog Embedding
        demog_emb_n = hp.Int("Demographics Embedding Dimension",
                             min_value=1,
                             max_value=64,
                             default=self.n_demog)

        # Average Embedding Params
        avg_l1 = hp.Choice("Average Embedding L1",
                           reg_vals,
                           parent_name="Feature Weighting",
                           parent_values=[True])
        avg_l2 = hp.Choice("Average Embedding L2",
                           reg_vals,
                           parent_name="Feature Weighting",
                           parent_values=[True])

        # LSTM Params
        lstm_n = hp.Int("LSTM Units",
                        min_value=32,
                        max_value=512,
                        default=32,
                        step=32)
        lstm_dropout = hp.Float("LSTM Dropout",
                                min_value=0.0,
                                max_value=0.9,
                                default=0.4,
                                step=0.01)
        lstm_recurrent_dropout = hp.Float("LSTM Recurrent Dropout",
                                          min_value=0.0,
                                          max_value=0.9,
                                          default=0.4,
                                          step=0.01)
        lstm_l1 = hp.Choice("LSTM weights L1", reg_vals)
        lstm_l2 = hp.Choice("LSTM weights L2", reg_vals)

        # Final dense layer
        dense_n = hp.Int("Dense Units",
                         min_value=2,
                         max_value=128,
                         sampling="log",
                         parent_name="Final Dense Layer",
                         parent_values=[True])
        # Model code
        feat_input = keras.Input(shape=(None, None), ragged=True)
        demog_input = keras.Input(shape=(self.n_demog_bags, ))

        demog_emb = keras.layers.Embedding(
            self.n_demog,
            output_dim=demog_emb_n,
            mask_zero=True,
            name="Demographic_Embeddings")(demog_input)

        demog_avg = keras.layers.Flatten()(demog_emb)

        emb1 = keras.layers.Embedding(
            self.vocab_size,
            output_dim=emb_n,
            embeddings_regularizer=keras.regularizers.l1_l2(emb_l1, emb_l2),
            mask_zero=True,
            name="Feature_Embeddings")(feat_input)

        if weighting:
            emb2 = keras.layers.Embedding(
                self.vocab_size,
                output_dim=1,
                embeddings_regularizer=keras.regularizers.l1_l2(
                    avg_l1, avg_l2),
                mask_zero=True,
                name="Average_Embeddings")(feat_input)

            # Multiplying the code embeddings by their respective weights
            mult = keras.layers.Multiply(name="Embeddings_by_Average")(
                [emb1, emb2])
            avg = keras.layers.Lambda(lambda x: tf.math.reduce_mean(x, axis=2),
                                      name="Averaging")(mult)
        else:
            avg = keras.layers.Lambda(lambda x: tf.math.reduce_mean(x, axis=2),
                                      name="Averaging")(emb1)

        lstm_layer = keras.layers.LSTM(
            lstm_n,
            dropout=lstm_dropout,
            recurrent_dropout=lstm_recurrent_dropout,
            recurrent_regularizer=keras.regularizers.l1_l2(lstm_l1, lstm_l2),
            name="Recurrent")(avg)

        lstm_layer = keras.layers.Concatenate()([lstm_layer, demog_avg])

        if final_dense:
            lstm_layer = keras.layers.Dense(dense_n,
                                            activation="relu",
                                            name="pre_output")(lstm_layer)

        activation_fn = "softmax" if self.n_classes > 2 else "sigmoid"
        output = keras.layers.Dense(
            self.n_classes if self.n_classes > 2 else 1,
            activation=activation_fn,
            name="Output")(lstm_layer)

        model = keras.Model([feat_input, demog_input], output)

        # --- Learning rate and momentum
        # lr = hp.Choice(
        #     "Learning Rate",
        #     [1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1])
        # momentum = hp.Float("Momentum", min_value=0.0, max_value=0.9, step=0.1)
        # opt = keras.optimizers.SGD(lr, momentum=momentum)
        opt = keras.optimizers.Adam()

        # --- Loss FN
        # NOTE: I was messing around with focal loss here, but I think that's
        # harder to justify and explain in this context
        if self.loss is None:
            if self.n_classes > 2:
                loss_fn = keras.losses.categorical_crossentropy
            else:
                loss_fn = keras.losses.binary_crossentropy
        else:
            loss_fn = self.loss

        model.compile(optimizer=opt, loss=loss_fn, metrics=self.metrics)

        return model