コード例 #1
0
ファイル: layers.py プロジェクト: alope107/MaLICE
 def build(self, input_shape):
     self.N_offset = self.add_weight(shape=(self.n_res, ),
                                     initializer=Constant(0.005),
                                     constraint=MinMaxNorm(
                                         min_value=self.N_offset_lower,
                                         max_value=self.N_offset_upper,
                                         rate=1.0),
                                     trainable=True)
     self.H_offset = self.add_weight(shape=(self.n_res, ),
                                     initializer=Constant(0.001),
                                     constraint=MinMaxNorm(
                                         min_value=self.H_offset_lower,
                                         max_value=self.H_offset_upper,
                                         rate=1.0),
                                     trainable=True)
コード例 #2
0
 def build(self, input_shape):
     # Create a trainable weight variable for this layer.
     channels = input_shape[3]
     channels2 = K.int_shape(self.input_h)[3]
     self.channels = channels
     self.w = input_shape[1]
     self.f = self.add_weight(name='f',
                              shape=(1, 1, channels2, channels // self.k),
                              initializer='uniform',
                              trainable=True)
     self.g = self.add_weight(name='g',
                              shape=(1, 1, channels, channels // self.k),
                              initializer='uniform',
                              trainable=True)
     if self.mix_concat == 'mix':
         self.gamma = self.add_weight(name='gamma',
                                      shape=(1, ),
                                      initializer='uniform',
                                      trainable=True)
     elif self.mix_concat == 'weighted_mix':
         self.gamma = self.add_weight(name='gamma',
                                      shape=(1, ),
                                      initializer='uniform',
                                      trainable=True,
                                      constraint=MinMaxNorm(min_value=0.0,
                                                            max_value=1.0))
     super(AttentionLayer,
           self).build(input_shape)  # Be sure to call this at the end
コード例 #3
0
def simple_dense():
    """Creates a simple sequential model, with 5 dense layers"""
    model = Sequential()
    model.add(
        Dense(units=32,
              input_shape=(32, ),
              use_bias=True,
              bias_constraint=MinMaxNorm(min_value=-1,
                                         max_value=1,
                                         rate=1.0,
                                         axis=0),
              bias_initializer=glorot_normal(seed=32),
              kernel_constraint=MaxNorm(max_value=1.5),
              kernel_initializer=glorot_uniform(seed=45)))
    model.add(Activation('relu'))
    model.add(
        Dense(units=32,
              activation='tanh',
              use_bias=False,
              activity_regularizer=l1_l2(l1=0.05, l2=0.05),
              kernel_constraint=MaxNorm(max_value=1.5),
              kernel_initializer=glorot_uniform(seed=45)))

    model.add(
        Dense(units=10,
              activation='softmax',
              use_bias=False,
              activity_regularizer=l1_l2(l1=0.05, l2=0.05),
              kernel_constraint=MaxNorm(max_value=1.5),
              kernel_initializer=glorot_uniform(seed=45)))
    return model
コード例 #4
0
ファイル: GAN_trial.py プロジェクト: ljaeg/aerogel_repo
def make_discriminator():
    mmn = MinMaxNorm(min_value=-.01, max_value=.01)
    model = Sequential()
    model.add(
        Conv2D(conv_scale, kernel_size, padding="same", kernel_constraint=mmn))
    model.add(LeakyReLU(alpha=.2))
    model.add(
        Conv2D(2 * conv_scale,
               kernel_size,
               padding="same",
               kernel_constraint=mmn))
    model.add(LeakyReLU(alpha=.2))
    model.add(
        Conv2D(2 * conv_scale,
               kernel_size,
               padding="same",
               kernel_constraint=mmn))
    model.add(BatchNormalization(momentum=.95))
    model.add(LeakyReLU(alpha=.2))
    # model.add(Conv2D(2*conv_scale, kernel_size, padding = "same"))
    # model.add(BatchNormalization(momentum = .8))
    # model.add(LeakyReLU(alpha = .2))
    model.add(Flatten())
    model.add(Dense(1, activation="linear"))
    model.compile(optimizer=RMSprop(lr=.00005),
                  loss=binary_crossentropy,
                  metrics=["accuracy"])
    return model
コード例 #5
0
    def create_trainable_wasserstein(self,
                                     nb_event,
                                     nb_type,
                                     nb_feature,
                                     wgan_clip=1.):
        from keras.layers import Input, Dense, Flatten, Convolution2D, Activation, Dropout, merge
        from keras.models import Model
        from keras.constraints import MinMaxNorm

        constraint = MinMaxNorm(min_value=-wgan_clip, max_value=wgan_clip)

        x = Input(batch_shape=(1, nb_event, nb_type, nb_feature),
                  dtype='float')
        y = Convolution2D(128,
                          kernel_size=[nb_event - 10 + 1, 1],
                          strides=(2, 1),
                          activation='relu',
                          kernel_constraint=constraint,
                          bias_constraint=constraint)(x)
        y = Dropout(0.5)(y)
        y = Convolution2D(128,
                          kernel_size=[3, nb_type],
                          activation='relu',
                          kernel_constraint=constraint,
                          bias_constraint=constraint)(y)
        y = Dropout(0.5)(y)
        y = Flatten()(y)
        y = Dense(2,
                  activation=None,
                  kernel_constraint=constraint,
                  bias_constraint=constraint)(y)

        model = Model(inputs=[x], outputs=[y], name='dis_output')
        self.model = model
        return model
コード例 #6
0
    def create_model(self, clear_session=False):
        if clear_session: K.clear_session()
        set_random_seed(666)
        seed(self.seed)
        model = Sequential()
        activation = self.model_kwargs["activation"]
        max_norm = self.model_kwargs["max_norm"]
        n_layers = self.model_kwargs["n_layers"]
        d = self.model_kwargs["dropouts"]
        units = self.model_kwargs["units"]
        rk = self.model_kwargs["rk"]
        rb = self.model_kwargs["rb"]
        lr = self.model_kwargs["lr"]

        model.add(
            Dense(input_shape=(self.input_dim, ),
                  units=units[0],
                  kernel_initializer="glorot_normal",
                  kernel_regularizer=l2(rk[0]),
                  kernel_constraint=MinMaxNorm(0, max_norm),
                  bias_regularizer=l2(rb[0]),
                  activation=activation))
        model.add(Dropout(d[0]))

        for i in range(n_layers - 1):
            model.add(
                Dense(units=units[i + 1],
                      kernel_initializer="glorot_normal",
                      kernel_regularizer=l2(rk[i + 1]),
                      bias_regularizer=l2(rb[i + 1]),
                      kernel_constraint=MinMaxNorm(0, max_norm),
                      activation=activation))
            model.add(Dropout(d[i + 1]))

        model.add(
            Dense(units=self.output_dim,
                  kernel_initializer="glorot_normal",
                  kernel_regularizer=l2(rk[-1]),
                  bias_regularizer=l2(rb[-1]),
                  kernel_constraint=MinMaxNorm(0, max_norm),
                  activation="softmax"))
        optimizer = Adam(lr=lr, clipnorm=.2)
        model.compile(loss="categorical_crossentropy",
                      optimizer=optimizer,
                      metrics=["acc"])
        self.model = model
コード例 #7
0
ファイル: layers.py プロジェクト: alope107/MaLICE
 def build(self, input_shape):
     self.cs_noise = self.add_weight(
         shape=(1, ),
         initializer=Constant(
             self.larmor / 250
         ),  #RandomUniform(minval=self.larmor/5000, maxval=self.larmor/500), 
         constraint=MinMaxNorm(min_value=self.cs_noise_lower,
                               max_value=self.cs_noise_upper,
                               rate=1.0),
         trainable=True)
コード例 #8
0
ファイル: layers.py プロジェクト: alope107/MaLICE
 def build(self, input_shape):
     init_I_mean = float(tf.math.reduce_mean(self.init_I))
     self.I_noise = self.add_weight(
         shape=(1, ),
         initializer=Constant(
             init_I_mean / 20.0
         ),  #RandomUniform(minval=init_I_mean/50, maxval=init_I_mean/4),  
         constraint=MinMaxNorm(min_value=self.I_noise_lower,
                               max_value=self.I_noise_upper,
                               rate=1.0),
         trainable=True)
コード例 #9
0
 def build(self, input_shape):
     assert isinstance(input_shape, list)
     # Create a trainable weight variable for this layer.
     self.k = self.add_weight(name='k',
                                   shape=[1],
                                   initializer=self.initializer,
                                   trainable=True,
                                   constraint=MinMaxNorm(min_value=-2.0, 
                                     max_value=2.0, rate=0.8))
     # Be sure to call this at the end
     super(VariableScaling, self).build(input_shape)
コード例 #10
0
ファイル: layers.py プロジェクト: alope107/MaLICE
 def build(self, input_shape):
     self.Kd_exp = self.add_weight(
         name='Kd_exp',
         shape=(1, ),
         #initializer=Constant(K.log(Kd)/K.log(10.0)),
         initializer=RandomUniform(minval=self.Kd_exp_lower,
                                   maxval=self.Kd_exp_upper),
         constraint=MinMaxNorm(min_value=self.Kd_exp_lower,
                               max_value=self.Kd_exp_upper,
                               rate=1.0),
         trainable=True)
     self.koff_exp = self.add_weight(
         name='koff_exp',
         shape=(1, ),
         #initializer=Constant(K.log(koff)/K.log(10.0))
         initializer=RandomUniform(minval=self.koff_exp_lower,
                                   maxval=self.koff_exp_upper),
         constraint=MinMaxNorm(min_value=self.koff_exp_lower,
                               max_value=self.koff_exp_upper,
                               rate=1.0),
         trainable=True)
コード例 #11
0
ファイル: layers.py プロジェクト: alope107/MaLICE
    def build(self, input_shape):
        init_I_mean = tf.math.reduce_mean(self.init_I)
        init_I_std = tf.math.reduce_std(self.init_I)
        self.I_offset = self.add_weight(name='ref_I',
                                        shape=(self.n_res, ),
                                        initializer=Constant(0.001),
                                        constraint=MinMaxNorm(
                                            min_value=self.I_offset_lower,
                                            max_value=self.I_offset_upper,
                                            rate=1.0),
                                        trainable=True)

        self.dR2 = self.add_weight(
            shape=(1, ),
            #initializer=Constant(dR2),
            initializer=RandomUniform(minval=self.dR2_lower,
                                      maxval=self.dR2_upper),
            constraint=MinMaxNorm(min_value=self.dR2_lower,
                                  max_value=self.dR2_upper,
                                  rate=1.0),
            trainable=True)
        self.amp_scaler = self.add_weight(
            name='amp_scaler',
            shape=(1, ),
            #initializer=Constant(amp_scaler),#
            initializer=RandomNormal(mean=float(5 * init_I_mean),
                                     stddev=float(5 * init_I_std)),
            constraint=MinMaxNorm(min_value=self.amp_scaler_lower,
                                  max_value=self.amp_scaler_upper),
            trainable=True)

        self.delta_w = self.add_weight(name='delta_w',
                                       shape=(self.n_res, ),
                                       initializer=Constant(self.larmor / 100),
                                       constraint=MinMaxNorm(
                                           min_value=self.delta_w_lower,
                                           max_value=self.delta_w_upper,
                                           rate=1.0),
                                       regularizer=L2(1e-2),
                                       trainable=True)
コード例 #12
0
 def build(self, input_shape):
     # initialize weight matrix for each capsule in lower layer
     self.rho = self.add_weight(shape=[input_shape[-1]],
                                initializer=Constant(1.0),
                                name='rho',
                                constraint=MinMaxNorm())
     self.gamma = self.add_weight(shape=[input_shape[-1]],
                                  initializer=Constant(1.0),
                                  name='gamma')
     self.beta = self.add_weight(shape=[input_shape[-1]],
                                 initializer=Constant(0.0),
                                 name='beta')
     self.built = True
コード例 #13
0
    def load_model(self, num_layers=10):
        self.add(
            Dense(units=32,
                  input_shape=(32, ),
                  use_bias=True,
                  bias_constraint=MinMaxNorm(min_value=-1,
                                             max_value=1,
                                             rate=1.0,
                                             axis=0),
                  bias_initializer=glorot_normal(seed=32),
                  kernel_constraint=MaxNorm(max_value=1.5),
                  kernel_initializer=glorot_uniform(seed=45)))
        self.add(
            Dense(units=32,
                  use_bias=True,
                  activation='tanh',
                  bias_constraint=MinMaxNorm(min_value=-1,
                                             max_value=1,
                                             rate=1.0,
                                             axis=0),
                  bias_initializer=glorot_normal(seed=32),
                  kernel_constraint=MaxNorm(max_value=1.5),
                  kernel_initializer=glorot_uniform(seed=45)))
        self.add(Dropout(rate=0.5))

        self.add(
            Dense(units=10,
                  use_bias=True,
                  activation='softmax',
                  bias_constraint=MinMaxNorm(min_value=-1,
                                             max_value=1,
                                             rate=1.0,
                                             axis=0),
                  bias_initializer=glorot_normal(seed=32),
                  kernel_constraint=MaxNorm(max_value=1.5),
                  kernel_initializer=glorot_uniform(seed=45)))
コード例 #14
0
    def build(self, input_shape):
        # initialize weight matrix for each capsule in lower layer
        self.W = self.add_weight(shape=[input_shape[-1]],
                                 initializer=Ones(),
                                 name='weights',
                                 constraint=MinMaxNorm())
        self.latent_size = input_shape[-1]

        # TODO: (local)Conv2D with high stride before dense? This is way to inefficient, no wonder UGATIT is 2G
        input_prod = np.prod(input_shape[1:])
        self.fc_gamma = Dense(input_shape[-1])
        self.fc_gamma.build((None, input_prod))
        self.fc_beta = Dense(input_shape[-1])
        self.fc_beta.build((None, input_prod))
        self.flatten = Flatten()
        self.flatten.build(input_shape)
        self.trainable_weights.extend(self.fc_beta.trainable_weights)
        self.trainable_weights.extend(self.fc_gamma.trainable_weights)

        self.built = True
コード例 #15
0
ファイル: lstmModel.py プロジェクト: recombee/lstm-models
def return_norm(name, lstm_config, minimum, maximum, logger):
    """
    Return Norm object to norm weight of neural network.
    """

    log_name = name
    name = lstm_config[name.lower()]
    if name == 'maxnorm':
        logger.info("In {} use {} constraint with max={}".format(
            log_name, name, maximum))
        return MaxNorm(maximum)
    if name == 'nonnegnorm':
        logger.info("In {} use {} constraint ".format(log_name, name))
        return NonNeg()
    if name == 'minmaxnorm':
        logger.info("In {} use {} constraint with min={} and max={}".format(
            log_name, name, minimum, maximum))
        return MinMaxNorm(minimum, maximum)
    else:
        logger.info("None constraint in {}.".format(log_name))
        return None
コード例 #16
0
def demand_lstm(step_back, ts_shape, y_shape):
    '''
    Architecture for LSTM model
    :param step_back:  number step back in time for demand
    :param ts_shape: shape of time time-space vector
    :param y_shape: shape of target vector
    :return: model
    '''
    demand_input = Input(shape=(step_back, 1))
    lstm_layer = LSTM(units=100, activation='tanh',
                      return_sequences=True)(demand_input)
    dropout = Dropout(0.5)(lstm_layer)
    lstm_layer1 = LSTM(units=50, activation='tanh',
                       return_sequences=True)(dropout)
    dropout_1 = Dropout(0.5)(lstm_layer1)
    lstm_layer2 = LSTM(units=25, activation='tanh',
                       return_sequences=True)(dropout_1)
    dropout_2 = Dropout(0.2)(lstm_layer2)
    lstm_layer3 = LSTM(units=10, activation='tanh',
                       return_sequences=True)(dropout_2)
    flatten_lstm3 = Flatten()(lstm_layer3)

    time_space_input = Input(shape=(ts_shape, ))
    dense_ts = Dense(64)(time_space_input)

    merge_ts_lstm = concatenate([flatten_lstm3, dense_ts])
    dense_1 = Dense(75)(merge_ts_lstm)
    dense_2 = Dense(25)(dense_1)
    output_dense = Dense(y_shape,
                         kernel_constraint=MinMaxNorm(min_value=0.0,
                                                      max_value=1.0))(dense_2)

    model = Model(inputs=[demand_input, time_space_input],
                  outputs=output_dense)
    model.compile(optimizer='adam', loss=mean_squared_error, metrics=[rmse])
    print(model.summary())
    return model
コード例 #17
0
    def compile_elmo(self, print_summary=False):

        if self.parameters['token_encoding'] == 'word':
            word_inputs = Input(shape=(None, ),
                                name='word_indices',
                                dtype='int32')
            embeddings = Embedding(self.parameters['vocab_size'],
                                   self.parameters['hidden_units_size'],
                                   trainable=True,
                                   name='token_encoding')
            inputs = embeddings(word_inputs)
            drop_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(inputs)
            lstm_inputs = TimestepDropout(
                self.parameters['word_dropout_rate'])(drop_inputs)
            next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32')
            previous_ids = Input(shape=(None, 1),
                                 name='previous_ids',
                                 dtype='float32')
        elif self.parameters['token_encoding'] == 'char':
            word_inputs = Input(shape=(
                None,
                self.parameters['token_maxlen'],
            ),
                                dtype='int32',
                                name='char_indices')
            inputs = self.char_level_token_encoder()(word_inputs)
            drop_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(inputs)
            lstm_inputs = TimestepDropout(
                self.parameters['word_dropout_rate'])(drop_inputs)
            next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32')
            previous_ids = Input(shape=(None, 1),
                                 name='previous_ids',
                                 dtype='float32')
        re_lstm_inputs = Lambda(function=ELMo_obj.reverse)(lstm_inputs)
        mask = Lambda(function=ELMo_obj.reverse)(drop_inputs)

        for i in range(self.parameters['n_lstm_layers']):
            if self.parameters['cuDNN']:
                lstm = CuDNNLSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(lstm_inputs)
            else:
                lstm = LSTM(units=self.parameters['lstm_units_size'],
                            return_sequences=True,
                            activation="tanh",
                            recurrent_activation='sigmoid',
                            kernel_constraint=MinMaxNorm(
                                -1 * self.parameters['cell_clip'],
                                self.parameters['cell_clip']),
                            recurrent_constraint=MinMaxNorm(
                                -1 * self.parameters['cell_clip'],
                                self.parameters['cell_clip']))(lstm_inputs)
            lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs])
            proj = TimeDistributed(
                Dense(self.parameters['hidden_units_size'],
                      activation='linear',
                      kernel_constraint=MinMaxNorm(
                          -1 * self.parameters['proj_clip'],
                          self.parameters['proj_clip'])))(lstm)
            lstm_inputs = add([proj, lstm_inputs],
                              name='f_block_{}'.format(i + 1))
            lstm_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(lstm_inputs)

        for i in range(self.parameters['n_lstm_layers']):
            if self.parameters['cuDNN']:
                re_lstm = CuDNNLSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(re_lstm_inputs)
            else:
                re_lstm = LSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(re_lstm_inputs)
            re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask])
            re_proj = TimeDistributed(
                Dense(self.parameters['hidden_units_size'],
                      activation='linear',
                      kernel_constraint=MinMaxNorm(
                          -1 * self.parameters['proj_clip'],
                          self.parameters['proj_clip'])))(re_lstm)
            re_lstm_inputs = add([re_proj, re_lstm_inputs],
                                 name='b_block_{}'.format(i + 1))
            re_lstm_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(re_lstm_inputs)
        re_lstm_inputs = Lambda(function=ELMo_obj.reverse,
                                name="reverse")(re_lstm_inputs)
        sampled_softmax = SampledSoftmax(
            num_classes=self.parameters['vocab_size'],
            num_sampled=int(self.parameters['num_sampled']),
            tied_to=embeddings if self.parameters['weight_tying']
            and self.parameters['token_encoding'] == 'word' else None)
        outputs = sampled_softmax([lstm_inputs, next_ids])
        re_outputs = sampled_softmax([re_lstm_inputs, previous_ids])
        self._model = Model(inputs=[word_inputs, next_ids, previous_ids],
                            outputs=[outputs, re_outputs])
        # self._model.compile(optimizer=Adagrad(lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']), loss=None)
        # if print_summary: self._model.summary()
        self.wrap_multi_elmo_encoder()
コード例 #18
0
def window_lstm(step_back, ts_shape, lr=0.001):
    demand_predictions = []  # array that will contain all predictions
    demand_input = Input(shape=(step_back, 1))
    flatten_lstm_block_1 = lstm_block(demand_input)

    # adding time space and input
    time_space_input = Input(shape=(ts_shape, ))
    dense_ts = Dense(64, name='dense_ts')(time_space_input)

    merge_ts_lstm = concatenate([flatten_lstm_block_1, dense_ts])
    dense_block_1 = dense_block(merge_ts_lstm)

    # generating d_t+1
    d_t_plus_1 = Dense(1,
                       kernel_constraint=MinMaxNorm(min_value=0.0,
                                                    max_value=1.0),
                       name='d_t_plus_1')(dense_block_1)
    demand_predictions.append(d_t_plus_1)

    demand_input_2 = append_demand_input(demand_input, d_t_plus_1)
    flatten_lstm_block_2 = lstm_block(demand_input_2)
    merge_ts_lstm_2 = concatenate([flatten_lstm_block_2, dense_ts])
    dense_block_2 = dense_block(merge_ts_lstm_2)

    # generating d_t+2
    d_t_plus_2 = Dense(1,
                       kernel_constraint=MinMaxNorm(min_value=0.0,
                                                    max_value=1.0),
                       name='d_t_plus_2')(dense_block_2)
    demand_predictions.append(d_t_plus_2)

    # using d_t+2 prediction
    demand_input_3 = append_demand_input(demand_input_2, d_t_plus_2)
    flatten_lstm_block_3 = lstm_block(demand_input_3)
    merge_ts_lstm_3 = concatenate([flatten_lstm_block_3, dense_ts])
    dense_block_3 = dense_block(merge_ts_lstm_3)

    # generating d_t+3
    d_t_plus_3 = Dense(1,
                       kernel_constraint=MinMaxNorm(min_value=0.0,
                                                    max_value=1.0),
                       name='d_t_plus_3')(dense_block_3)
    demand_predictions.append(d_t_plus_3)

    # using d_t+3 prediction
    demand_input_4 = append_demand_input(demand_input_3, d_t_plus_3)
    flatten_lstm_block_4 = lstm_block(demand_input_4)
    merge_ts_lstm_4 = concatenate([flatten_lstm_block_4, dense_ts])
    dense_block_4 = dense_block(merge_ts_lstm_4)

    # generating d_t+4
    d_t_plus_4 = Dense(1,
                       kernel_constraint=MinMaxNorm(min_value=0.0,
                                                    max_value=1.0),
                       name='d_t_plus_4')(dense_block_4)
    demand_predictions.append(d_t_plus_4)

    # using d_t+4 prediction
    demand_input_5 = append_demand_input(demand_input_4, d_t_plus_4)
    flatten_lstm_block_5 = lstm_block(demand_input_5)
    merge_ts_lstm_5 = concatenate([flatten_lstm_block_5, dense_ts])
    dense_block_5 = dense_block(merge_ts_lstm_5)

    # generating d_t+5
    d_t_plus_5 = Dense(1,
                       kernel_constraint=MinMaxNorm(min_value=0.0,
                                                    max_value=1.0),
                       name='d_t_plus_5')(dense_block_5)
    demand_predictions.append(d_t_plus_5)

    model = Model(inputs=[demand_input, time_space_input],
                  outputs=demand_predictions)
    adam = Adam(lr=lr)
    model.compile(optimizer=adam, loss=mean_squared_error, metrics=[rmse])
    return model
コード例 #19
0
 def build(self, input_shape): 
     # initialize weight matrix for each capsule in lower layer
     self.beta = self.add_weight(shape = [1], initializer = Ones(), name = 'beta', constraint=MinMaxNorm(-0.2, 2.0, 0.8))
     self.built = True
コード例 #20
0
ファイル: ModelCollection.py プロジェクト: Kotzly/TCC_EMG
class ModelCollection():

    BEST_MODEL_1 = {'estimator':Classifier('BEST_MODEL_1'),
                                       'param_grid':
                                       {'units':[[200,150, 100], [100,100, 100], [200, 200, 200]],
                                        'input_dim':[1110],
                                        'output_dim':[49],
                                        'activations':['relu', 'selu'],
                                        'regularizers':[None, [l2(l=1e-5), l2(l=1e-5), l2(l=1e-5)]],
                                        'bregularizers':[None, [l2(l=1e-4), l2(l=1e-4), l2(l=1e-4)]],
                                        'initializers':['glorot_normal'],
                                        'constraints':[MinMaxNorm(0,0.5)],
                                        'dropouts':[[0.3, 0.2],[0.4, 0.3]],
                                        'lr':[1e-5, 1e-6],
                                        'loss_func':['categorical_crossentropy'],
                                        'batch_size':[256, 1024],
                                        'epochs':[5000]}}

    NEURAL_NETWORK_3LAYERS_COMPLETE = {'estimator':Classifier('3_LAYER_NN_A'),
                                       'param_grid':
                                       {'units':[[156, 156, 156], [156, 102, 49], [121, 96, 49], [148, 128, 49]],
                                        'input_dim':[156],
                                        'output_dim':[49],
                                        'activations':['relu', 'tanh'],
                                        'regularizers':[None, [l2(l=0.0001), l2(l=0.0001), l2(l=0.0001)]],
                                        'initializers':['glorot_normal', 'glorot_uniform'],
                                        'dropouts':[0.2, 0.3],
                                        'lr':[0.001, 0.0001],
                                        'loss_func':['sparse_categorical_crossentropy', categorical_cubic_hinge, categorical_squared_hinge],
                                        'batch_size':[32, 64, 128],
                                        'epochs':[1000, 1500, 2000]}}

    NEURAL_NETWORK_2LAYERS_COMPLETE = {'estimator':Classifier('2_LAYER_NN_A'),
                                       'param_grid':
                                       {'units':[[156, 156], [156, 49], [106, 49]],
                                        'input_dim':[156],
                                        'output_dim':[49],
                                        'activations':['relu', 'tanh'],
                                        'regularizers':[None, [l2(l=0.0001), l2(l=0.0001)]],
                                        'initializers':['glorot_normal', 'glorot_uniform'],
                                        'dropouts':[0.2, 0.3],
                                        'lr':[0.001, 0.0001],
                                        'loss_func':['sparse_categorical_crossentropy', categorical_cubic_hinge, categorical_squared_hinge],
                                        'batch_size':[32, 64, 128],
                                        'epochs':[1000, 1500, 2000]}}

    NEURAL_NETWORK_3LAYERS_COMPACT = {'estimator':Classifier('3_LAYER_NN_B'),
                                      'param_grid':
                                      {'units':[[156, 156, 156], [156, 102, 49], [148, 128, 49]],
                                       'input_dim':[156],
                                       'output_dim':[49],
                                       'activations':['relu'],
                                       'regularizers':[None],
                                       'initializers':['glorot_normal', 'glorot_uniform'],
                                       'dropouts':[0.2, 0.3],
                                       'lr':[0.0001],
                                       'loss_func':['sparse_categorical_crossentropy'],
                                       'batch_size':[64, 128],
                                       'epochs':[1000]}}

    NEURAL_NETWORK_2LAYERS_COMPACT = {'estimator':Classifier('2_LAYER_NN_B'),
                                      'param_grid':
                                      {'units':[[156, 156], [156, 49], [106, 49]],
                                       'input_dim':[156],
                                       'output_dim':[49],
                                       'activations':['relu'],
                                       'regularizers':[None],
                                       'initializers':['glorot_normal', 'glorot_uniform'],
                                       'dropouts':[0.2, 0.3],
                                       'lr':[0.0001],
                                       'loss_func':['sparse_categorical_crossentropy'],
                                       'batch_size':[64, 128],
                                       'epochs':[1000]}}
    NEURAL_NETWORK_MINIMALIST_1 = {'estimator':Classifier('1_MINIMALIST'),
                                   'param_grid':
                                   {'units':[[156, 156], [156, 49], [106, 49]],
                                    'input_dim':[156],
                                    'output_dim':[49],
                                    'activations':['relu'],
                                    'regularizers':[None],
                                    'initializers':['glorot_normal', 'glorot_uniform'],
                                    'dropouts':[0.2, 0.3],
                                    'lr':[0.0001],
                                    'loss_func':['sparse_categorical_crossentropy'],
                                    'batch_size':[64, 128],
                                    'epochs':[1000]}}

    SVC_RBF = {'estimator':SVC(),
               'param_grid':
               {'C':[2**i for i in range(1, 8)],
                'gamma':[2**-i for i in range(4, 16)],
                'kernel':['rbf']}}

    SVC_LINEAR = {'estimator':SVC(),
                  'param_grid':
                  {'C':[2**i for i in range(1, 8)],
                   'gamma':[2**-i for i in range(4, 16)],
                   'kernel':['linear']}}

    SVC_SIG = {'estimator':SVC(),
               'param_grid':
               {'C':[2**i for i in range(0, 7)],
                'coef0':[np.linspace(-5, 5, 11)],
                'gamma':[2**-i for i in range(5, 15)],
                'kernel':['sigmoid'],
                'class_weight':['balanced', None]}}

    SVC_POLY = {'estimator':SVC(),
                'param_grid':
                {'C':[2**i for i in range(0, 7)],
                 'coef0':[np.linspace(-5, 5, 11)],
                 'gamma':[2**-i for i in range(5, 15)],
                 'degree':[1, 2, 3, 4],
                 'kernel':['poly'],
                 'class_weight':['balanced', None]}}

    KNN = {'estimator':KNeighborsClassifier(),
           'param_grid':
           {'n_neighbors':[1, 3, 7, 15, 30, 60, 120, 250, 500],
            'weights':['uniform', 'distance'],
            'p':[1, 2],
            'metric':['minkowski']}}

    LOGISTIC_REGRESSION = {'estimator':LogisticRegression(),
                           'param_grid':
                           {'C':[i for i in range(1, 50, 2)],
                            'class_weight ':[None, 'balanced'],
                            'solver':['newton-cg', 'saga', 'lbfgs', 'sag'],
                            'multi_class':['ovr', 'multinomial'],
                            'l1_ratio':[0, 0.2, 0.5, 0.8, 1]}}
コード例 #21
0
 def __call__(self, w):
     return MinMaxNorm(self.min_value, self.max_value, self.penalty)(w)
コード例 #22
0
    def compile_elmo(self, print_summary=False):
        """
        Compiles a Language Model RNN based on the given parameters
        在给定的参数上编译语言模型
        """

        # 可以选择字符嵌入然后进行编码过程, 或者选择词嵌入然后进行编码过程
        if self.parameters['token_encoding'] == 'word':
            # Train word embeddings from scratch  字嵌入
            word_inputs = Input(shape=(None, ),
                                name='word_indices',
                                dtype='int32')
            embeddings = Embedding(self.parameters['vocab_size'],
                                   self.parameters['hidden_units_size'],
                                   trainable=True,
                                   name='token_encoding')
            inputs = embeddings(word_inputs)

            # Token embeddings for Input
            drop_inputs = SpatialDropout1D(self.parameters['dropout_rate'])(
                inputs
            )  # SpatialDropout1D随机将某一维置为零 https://blog.csdn.net/weixin_43896398/article/details/84762943
            lstm_inputs = TimestepDropout(
                self.parameters['word_dropout_rate'])(drop_inputs)

            # Pass outputs as inputs to apply sampled softmax
            next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32')
            previous_ids = Input(shape=(None, 1),
                                 name='previous_ids',
                                 dtype='float32')

        elif self.parameters['token_encoding'] == 'char':
            # Train character-level representation
            word_inputs = Input(shape=(
                None,
                self.parameters['token_maxlen'],
            ),
                                dtype='int32',
                                name='char_indices')
            inputs = self.char_level_token_encoder()(
                word_inputs)  # 调用字符嵌入 卷积后的结果

            # Token embeddings for Input    # 执行dropout
            drop_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(inputs)
            lstm_inputs = TimestepDropout(
                self.parameters['word_dropout_rate'])(drop_inputs)

            # Pass outputs as inputs to apply sampled softmax
            next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32')
            previous_ids = Input(shape=(None, 1),
                                 name='previous_ids',
                                 dtype='float32')

        # Reversed input for backward LSTMs  # 将LSTM结构直接反向。  为了后面实现反向的LSTM
        re_lstm_inputs = Lambda(function=ELMo.reverse)(lstm_inputs)
        mask = Lambda(function=ELMo.reverse)(drop_inputs)  # mask也反向

        # Forward LSTMs  前向LSTM
        for i in range(self.parameters['n_lstm_layers']):
            if self.parameters[
                    'cuDNN']:  # cuDNN 是加速的对应的LSTM或者RNN等 依赖于后端的gpu  这里可以选择加速的LSTM或者选择传统的LSTM
                lstm = CuDNNLSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(lstm_inputs)
            else:
                lstm = LSTM(units=self.parameters['lstm_units_size'],
                            return_sequences=True,
                            activation="tanh",
                            recurrent_activation='sigmoid',
                            kernel_constraint=MinMaxNorm(
                                -1 * self.parameters['cell_clip'],
                                self.parameters['cell_clip']),
                            recurrent_constraint=MinMaxNorm(
                                -1 * self.parameters['cell_clip'],
                                self.parameters['cell_clip']))(lstm_inputs)

            lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs])
            # Projection to hidden_units_size    输出后加Dense得到当前
            proj = TimeDistributed(
                Dense(self.parameters['hidden_units_size'],
                      activation='linear',
                      kernel_constraint=MinMaxNorm(
                          -1 * self.parameters['proj_clip'],
                          self.parameters['proj_clip'])))(lstm)
            # Merge Bi-LSTMs feature vectors with the previous ones  将proj向量和lstm_inputs相加
            lstm_inputs = add([proj, lstm_inputs],
                              name='f_block_{}'.format(i + 1))
            # Apply variational drop-out between BI-LSTM layers    #  当前的LSTM执行dropout  然后可以输入下层的LSTM
            lstm_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(lstm_inputs)

        # Backward LSTMs  反向的LSTM
        for i in range(self.parameters['n_lstm_layers']):
            if self.parameters['cuDNN']:
                re_lstm = CuDNNLSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(re_lstm_inputs)
            else:
                re_lstm = LSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(re_lstm_inputs)

            re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask])
            # Projection to hidden_units_size
            re_proj = TimeDistributed(
                Dense(self.parameters['hidden_units_size'],
                      activation='linear',
                      kernel_constraint=MinMaxNorm(
                          -1 * self.parameters['proj_clip'],
                          self.parameters['proj_clip'])))(re_lstm)

            # Merge Bi-LSTMs feature vectors with the previous ones   将re_proj向量和re_lstm_inputs相加
            re_lstm_inputs = add([re_proj, re_lstm_inputs],
                                 name='b_block_{}'.format(i + 1))

            # Apply variational drop-out between BI-LSTM layers  反向的每层加dropout
            re_lstm_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(re_lstm_inputs)

        # Reverse backward LSTMs' outputs = Make it forward again   将反向的LSTM的输出反向
        re_lstm_inputs = Lambda(function=ELMo.reverse,
                                name="reverse")(re_lstm_inputs)

        # Project to Vocabulary with Sampled Softmax
        sampled_softmax = SampledSoftmax(
            num_classes=self.parameters['vocab_size'],
            num_sampled=int(self.parameters['num_sampled']),
            tied_to=embeddings if self.parameters['weight_tying'] else None)

        # 正向LSTM每次输入,然后预测下一个词   反向LSTM 每次输入,然后预测上一个词
        outputs = sampled_softmax([lstm_inputs, next_ids])
        re_outputs = sampled_softmax([re_lstm_inputs, previous_ids])

        self._model = Model(inputs=[word_inputs, next_ids, previous_ids],
                            outputs=[outputs, re_outputs])  # 正向和反向的输出
        self._model.compile(optimizer=Adagrad(
            lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']),
                            loss=None)
        if print_summary:
            self._model.summary()
コード例 #23
0
    def compile_elmo(self, print_summary=False):
        """
        Compiles a Language Model RNN based on the given parameters
        """

        if self.parameters['token_encoding'] == 'word':
            # Train word embeddings from scratch
            word_inputs = Input(shape=(None, ),
                                name='word_indices',
                                dtype='int32')
            embeddings = Embedding(self.parameters['vocab_size'],
                                   self.parameters['hidden_units_size'],
                                   trainable=True,
                                   name='token_encoding')
            inputs = embeddings(word_inputs)

            # Token embeddings for Input
            drop_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(inputs)
            lstm_inputs = TimestepDropout(
                self.parameters['word_dropout_rate'])(drop_inputs)

            # Pass outputs as inputs to apply sampled softmax
            next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32')
            previous_ids = Input(shape=(None, 1),
                                 name='previous_ids',
                                 dtype='float32')
        elif self.parameters['token_encoding'] == 'char':
            # Train character-level representation
            word_inputs = Input(shape=(
                None,
                self.parameters['token_maxlen'],
            ),
                                dtype='int32',
                                name='char_indices')
            inputs = self.char_level_token_encoder()(word_inputs)

            # Token embeddings for Input
            drop_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(inputs)
            lstm_inputs = TimestepDropout(
                self.parameters['word_dropout_rate'])(drop_inputs)

            # Pass outputs as inputs to apply sampled softmax
            next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32')
            previous_ids = Input(shape=(None, 1),
                                 name='previous_ids',
                                 dtype='float32')

        # Reversed input for backward LSTMs
        re_lstm_inputs = Lambda(function=ELMo.reverse)(lstm_inputs)
        mask = Lambda(function=ELMo.reverse)(drop_inputs)

        # Forward LSTMs
        for i in range(self.parameters['n_lstm_layers']):
            if self.parameters['cuDNN']:
                lstm = CuDNNLSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(lstm_inputs)
            else:
                lstm = LSTM(units=self.parameters['lstm_units_size'],
                            return_sequences=True,
                            activation="tanh",
                            recurrent_activation='sigmoid',
                            kernel_constraint=MinMaxNorm(
                                -1 * self.parameters['cell_clip'],
                                self.parameters['cell_clip']),
                            recurrent_constraint=MinMaxNorm(
                                -1 * self.parameters['cell_clip'],
                                self.parameters['cell_clip']))(lstm_inputs)
            lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs])
            # Projection to hidden_units_size
            proj = TimeDistributed(
                Dense(self.parameters['hidden_units_size'],
                      activation='linear',
                      kernel_constraint=MinMaxNorm(
                          -1 * self.parameters['proj_clip'],
                          self.parameters['proj_clip'])))(lstm)
            # Merge Bi-LSTMs feature vectors with the previous ones
            lstm_inputs = add([proj, lstm_inputs],
                              name='f_block_{}'.format(i + 1))
            # Apply variational drop-out between BI-LSTM layers
            lstm_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(lstm_inputs)

        # Backward LSTMs
        for i in range(self.parameters['n_lstm_layers']):
            if self.parameters['cuDNN']:
                re_lstm = CuDNNLSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(re_lstm_inputs)
            else:
                re_lstm = LSTM(
                    units=self.parameters['lstm_units_size'],
                    return_sequences=True,
                    activation='tanh',
                    recurrent_activation='sigmoid',
                    kernel_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']),
                    recurrent_constraint=MinMaxNorm(
                        -1 * self.parameters['cell_clip'],
                        self.parameters['cell_clip']))(re_lstm_inputs)
            re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask])
            # Projection to hidden_units_size
            re_proj = TimeDistributed(
                Dense(self.parameters['hidden_units_size'],
                      activation='linear',
                      kernel_constraint=MinMaxNorm(
                          -1 * self.parameters['proj_clip'],
                          self.parameters['proj_clip'])))(re_lstm)
            # Merge Bi-LSTMs feature vectors with the previous ones
            re_lstm_inputs = add([re_proj, re_lstm_inputs],
                                 name='b_block_{}'.format(i + 1))
            # Apply variational drop-out between BI-LSTM layers
            re_lstm_inputs = SpatialDropout1D(
                self.parameters['dropout_rate'])(re_lstm_inputs)

        # Reverse backward LSTMs' outputs = Make it forward again
        re_lstm_inputs = Lambda(function=ELMo.reverse,
                                name="reverse")(re_lstm_inputs)

        # Project to Vocabulary with Sampled Softmax
        sampled_softmax = SampledSoftmax(
            num_classes=self.parameters['vocab_size'],
            num_sampled=int(self.parameters['num_sampled']),
            tied_to=embeddings if self.parameters['weight_tying']
            and self.parameters['token_encoding'] == 'word' else None)
        outputs = sampled_softmax([lstm_inputs, next_ids])
        re_outputs = sampled_softmax([re_lstm_inputs, previous_ids])

        self._model = Model(inputs=[word_inputs, next_ids, previous_ids],
                            outputs=[outputs, re_outputs])
        self._model.compile(optimizer=Adagrad(
            lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']),
                            loss=None)
        if print_summary:
            self._model.summary()
コード例 #24
0
 def build(self, input_shape): 
     # initialize weight matrix for each capsule in lower layer
     self.beta = self.add_weight(shape=list(input_shape)[1:], name = 'beta', initializer=Ones(), constraint=MinMaxNorm(-0.1, 2.0, 0.8), trainable=True)
     self.built = True
コード例 #25
0
ファイル: MNIST.py プロジェクト: mahdiehAbb/MNIST
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


# model = Sequential()
# # The input layer requires the special input_shape parameter which should match
# # the shape of our training data.
# # MinMaxNorm(min_value=0.0, max_value=1.0, rate=1.0, axis=0
# model.add(Dense(units=4, activation='sigmoid', input_shape=(image_size,), use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0)))
# model.add(Dense(units=num_classes, activation='softmax', use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0)))
# model.summary()

inputs = Input(shape=(784,), name='img')

dense_1 = Dense(90, activation='relu', use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0))
intermediate_output = dense_1(inputs)
dense_2 = Dense(60, activation='relu', use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0))
intermediate_output = dense_2(intermediate_output)

dense = Dense(num_classes, activation="softmax", use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0))
outputs = dense(intermediate_output)

intermediate_model = Model(inputs=inputs, outputs=intermediate_output)
model = Model(inputs=inputs, outputs=outputs, name='mnist_model')
model.summary()


logger = keras.callbacks.ProgbarLogger(count_mode='samples', stateful_metrics=None)
model.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train, validation_split=0.1,
コード例 #26
0
    def _compile_hans(self, shape, n_hidden_layers, hidden_units_size,
                      dropout_rate, word_dropout_rate, lr):
        """
        Compiles a Hierarchical Attention Network based on the given parameters
        :param shape: The shape of the sequence, i.e. (number of sections, number of tokens)
        :param hidden_units_size: size of hidden units, as a list
        :param dropout_rate: The percentage of inputs to dropout
        :param word_dropout_rate: The percentage of timesteps to dropout
        :param lr: learning rate
        :return: Nothing
        """

        # Sentence Feature Representation
        section_inputs = Input(shape=(None, ), name='document_inputs')
        self.pretrained_embeddings = self.PretrainedEmbedding()
        section_embs = self.pretrained_embeddings(section_inputs)

        # Apply variational dropout
        drop_section_embs = SpatialDropout1D(
            dropout_rate, name='feature_dropout')(section_embs)
        encodings = TimestepDropout(word_dropout_rate,
                                    name='word_dropout')(drop_section_embs)

        # Bi-GRUs over token embeddings
        for i in range(n_hidden_layers[0]):
            if self._cuDNN:
                grus = Bidirectional(
                    CuDNNGRU(hidden_units_size[0],
                             return_sequences=True,
                             kernel_constraint=MinMaxNorm(min_value=-2,
                                                          max_value=2)),
                    name='bidirectional_grus_{}'.format(i))(encodings)
            else:
                grus = Bidirectional(
                    GRU(hidden_units_size[0],
                        activation="tanh",
                        recurrent_activation='sigmoid',
                        return_sequences=True),
                    kernel_constraint=MinMaxNorm(min_value=-2, max_value=2),
                    name='bidirectional_grus_{}'.format(i))(encodings)
            grus = Camouflage(mask_value=0.0)([grus, encodings])
            if i == 0:
                encodings = SpatialDropout1D(dropout_rate)(grus)
            else:
                encodings = add([grus, encodings])
                encodings = SpatialDropout1D(dropout_rate)(encodings)

        # Attention over BI-GRU (context-aware) embeddings
        if self._attention_mechanism == 'maxpooling':
            section_encoder = GlobalMaxPooling1D()(encodings)
        elif self._attention_mechanism == 'attention':
            encodings = SymmetricMasking()([encodings, encodings])
            section_encoder = ContextualAttention(
                kernel_regularizer=l2(), bias_regularizer=l2())(encodings)

        # Wrap up section_encoder
        section_encoder = Model(inputs=section_inputs,
                                outputs=section_encoder,
                                name='sentence_encoder')

        # Document Input Layer
        document_inputs = Input(shape=(
            shape[0],
            shape[1],
        ),
                                name='document_inputs')

        # Distribute sentences
        section_encodings = TimeDistributed(
            section_encoder, name='sentence_encodings')(document_inputs)

        # BI-GRUs over section embeddings
        for i in range(n_hidden_layers[1]):
            if self._cuDNN:
                grus = Bidirectional(
                    CuDNNGRU(hidden_units_size[1],
                             return_sequences=True,
                             kernel_constraint=MinMaxNorm(min_value=-2,
                                                          max_value=2)),
                    name='bidirectional_grus_upper_{}'.format(i))(
                        section_encodings)
            else:
                grus = Bidirectional(GRU(hidden_units_size[1],
                                         activation="tanh",
                                         recurrent_activation='sigmoid',
                                         return_sequences=True,
                                         kernel_constraint=MinMaxNorm(
                                             min_value=-2, max_value=2)),
                                     name='bidirectional_grus_upper_{}'.format(
                                         i))(section_encodings)
            grus = Camouflage(mask_value=0.0)([grus, section_encodings])
            if i == 0:
                section_encodings = SpatialDropout1D(dropout_rate)(grus)
            else:
                section_encodings = add([grus, section_encodings])
                section_encodings = SpatialDropout1D(dropout_rate)(
                    section_encodings)

        # Attention over BI-LSTM (context-aware) sentence embeddings
        if self._attention_mechanism == 'maxpooling':
            doc_encoding = GlobalMaxPooling1D(
                name='max_pooling')(section_encodings)
        elif self._attention_mechanism == 'attention':
            section_encodings = SymmetricMasking()(
                [section_encodings, section_encodings])
            doc_encoding = ContextualAttention(
                kernel_regularizer=l2(),
                bias_regularizer=l2(),
                name='self_attention')(section_encodings)
        losses = 'binary_crossentropy' if self._decision_type == 'multi_label' else 'categorical_crossentropy'
        loss_weights = None

        # Final output (projection) layer
        outputs = Dense(self.n_classes,
                        activation='sigmoid'
                        if self._decision_type == 'multi_label' else 'softmax',
                        name='outputs')(doc_encoding)

        # Wrap up model + Compile with optimizer and loss function
        self.model = Model(inputs=document_inputs, outputs=[outputs])
        self.model.compile(optimizer=Adam(lr=lr, clipvalue=2.0),
                           loss=losses,
                           loss_weights=loss_weights)