def build(self, input_shape): self.N_offset = self.add_weight(shape=(self.n_res, ), initializer=Constant(0.005), constraint=MinMaxNorm( min_value=self.N_offset_lower, max_value=self.N_offset_upper, rate=1.0), trainable=True) self.H_offset = self.add_weight(shape=(self.n_res, ), initializer=Constant(0.001), constraint=MinMaxNorm( min_value=self.H_offset_lower, max_value=self.H_offset_upper, rate=1.0), trainable=True)
def build(self, input_shape): # Create a trainable weight variable for this layer. channels = input_shape[3] channels2 = K.int_shape(self.input_h)[3] self.channels = channels self.w = input_shape[1] self.f = self.add_weight(name='f', shape=(1, 1, channels2, channels // self.k), initializer='uniform', trainable=True) self.g = self.add_weight(name='g', shape=(1, 1, channels, channels // self.k), initializer='uniform', trainable=True) if self.mix_concat == 'mix': self.gamma = self.add_weight(name='gamma', shape=(1, ), initializer='uniform', trainable=True) elif self.mix_concat == 'weighted_mix': self.gamma = self.add_weight(name='gamma', shape=(1, ), initializer='uniform', trainable=True, constraint=MinMaxNorm(min_value=0.0, max_value=1.0)) super(AttentionLayer, self).build(input_shape) # Be sure to call this at the end
def simple_dense(): """Creates a simple sequential model, with 5 dense layers""" model = Sequential() model.add( Dense(units=32, input_shape=(32, ), use_bias=True, bias_constraint=MinMaxNorm(min_value=-1, max_value=1, rate=1.0, axis=0), bias_initializer=glorot_normal(seed=32), kernel_constraint=MaxNorm(max_value=1.5), kernel_initializer=glorot_uniform(seed=45))) model.add(Activation('relu')) model.add( Dense(units=32, activation='tanh', use_bias=False, activity_regularizer=l1_l2(l1=0.05, l2=0.05), kernel_constraint=MaxNorm(max_value=1.5), kernel_initializer=glorot_uniform(seed=45))) model.add( Dense(units=10, activation='softmax', use_bias=False, activity_regularizer=l1_l2(l1=0.05, l2=0.05), kernel_constraint=MaxNorm(max_value=1.5), kernel_initializer=glorot_uniform(seed=45))) return model
def make_discriminator(): mmn = MinMaxNorm(min_value=-.01, max_value=.01) model = Sequential() model.add( Conv2D(conv_scale, kernel_size, padding="same", kernel_constraint=mmn)) model.add(LeakyReLU(alpha=.2)) model.add( Conv2D(2 * conv_scale, kernel_size, padding="same", kernel_constraint=mmn)) model.add(LeakyReLU(alpha=.2)) model.add( Conv2D(2 * conv_scale, kernel_size, padding="same", kernel_constraint=mmn)) model.add(BatchNormalization(momentum=.95)) model.add(LeakyReLU(alpha=.2)) # model.add(Conv2D(2*conv_scale, kernel_size, padding = "same")) # model.add(BatchNormalization(momentum = .8)) # model.add(LeakyReLU(alpha = .2)) model.add(Flatten()) model.add(Dense(1, activation="linear")) model.compile(optimizer=RMSprop(lr=.00005), loss=binary_crossentropy, metrics=["accuracy"]) return model
def create_trainable_wasserstein(self, nb_event, nb_type, nb_feature, wgan_clip=1.): from keras.layers import Input, Dense, Flatten, Convolution2D, Activation, Dropout, merge from keras.models import Model from keras.constraints import MinMaxNorm constraint = MinMaxNorm(min_value=-wgan_clip, max_value=wgan_clip) x = Input(batch_shape=(1, nb_event, nb_type, nb_feature), dtype='float') y = Convolution2D(128, kernel_size=[nb_event - 10 + 1, 1], strides=(2, 1), activation='relu', kernel_constraint=constraint, bias_constraint=constraint)(x) y = Dropout(0.5)(y) y = Convolution2D(128, kernel_size=[3, nb_type], activation='relu', kernel_constraint=constraint, bias_constraint=constraint)(y) y = Dropout(0.5)(y) y = Flatten()(y) y = Dense(2, activation=None, kernel_constraint=constraint, bias_constraint=constraint)(y) model = Model(inputs=[x], outputs=[y], name='dis_output') self.model = model return model
def create_model(self, clear_session=False): if clear_session: K.clear_session() set_random_seed(666) seed(self.seed) model = Sequential() activation = self.model_kwargs["activation"] max_norm = self.model_kwargs["max_norm"] n_layers = self.model_kwargs["n_layers"] d = self.model_kwargs["dropouts"] units = self.model_kwargs["units"] rk = self.model_kwargs["rk"] rb = self.model_kwargs["rb"] lr = self.model_kwargs["lr"] model.add( Dense(input_shape=(self.input_dim, ), units=units[0], kernel_initializer="glorot_normal", kernel_regularizer=l2(rk[0]), kernel_constraint=MinMaxNorm(0, max_norm), bias_regularizer=l2(rb[0]), activation=activation)) model.add(Dropout(d[0])) for i in range(n_layers - 1): model.add( Dense(units=units[i + 1], kernel_initializer="glorot_normal", kernel_regularizer=l2(rk[i + 1]), bias_regularizer=l2(rb[i + 1]), kernel_constraint=MinMaxNorm(0, max_norm), activation=activation)) model.add(Dropout(d[i + 1])) model.add( Dense(units=self.output_dim, kernel_initializer="glorot_normal", kernel_regularizer=l2(rk[-1]), bias_regularizer=l2(rb[-1]), kernel_constraint=MinMaxNorm(0, max_norm), activation="softmax")) optimizer = Adam(lr=lr, clipnorm=.2) model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["acc"]) self.model = model
def build(self, input_shape): self.cs_noise = self.add_weight( shape=(1, ), initializer=Constant( self.larmor / 250 ), #RandomUniform(minval=self.larmor/5000, maxval=self.larmor/500), constraint=MinMaxNorm(min_value=self.cs_noise_lower, max_value=self.cs_noise_upper, rate=1.0), trainable=True)
def build(self, input_shape): init_I_mean = float(tf.math.reduce_mean(self.init_I)) self.I_noise = self.add_weight( shape=(1, ), initializer=Constant( init_I_mean / 20.0 ), #RandomUniform(minval=init_I_mean/50, maxval=init_I_mean/4), constraint=MinMaxNorm(min_value=self.I_noise_lower, max_value=self.I_noise_upper, rate=1.0), trainable=True)
def build(self, input_shape): assert isinstance(input_shape, list) # Create a trainable weight variable for this layer. self.k = self.add_weight(name='k', shape=[1], initializer=self.initializer, trainable=True, constraint=MinMaxNorm(min_value=-2.0, max_value=2.0, rate=0.8)) # Be sure to call this at the end super(VariableScaling, self).build(input_shape)
def build(self, input_shape): self.Kd_exp = self.add_weight( name='Kd_exp', shape=(1, ), #initializer=Constant(K.log(Kd)/K.log(10.0)), initializer=RandomUniform(minval=self.Kd_exp_lower, maxval=self.Kd_exp_upper), constraint=MinMaxNorm(min_value=self.Kd_exp_lower, max_value=self.Kd_exp_upper, rate=1.0), trainable=True) self.koff_exp = self.add_weight( name='koff_exp', shape=(1, ), #initializer=Constant(K.log(koff)/K.log(10.0)) initializer=RandomUniform(minval=self.koff_exp_lower, maxval=self.koff_exp_upper), constraint=MinMaxNorm(min_value=self.koff_exp_lower, max_value=self.koff_exp_upper, rate=1.0), trainable=True)
def build(self, input_shape): init_I_mean = tf.math.reduce_mean(self.init_I) init_I_std = tf.math.reduce_std(self.init_I) self.I_offset = self.add_weight(name='ref_I', shape=(self.n_res, ), initializer=Constant(0.001), constraint=MinMaxNorm( min_value=self.I_offset_lower, max_value=self.I_offset_upper, rate=1.0), trainable=True) self.dR2 = self.add_weight( shape=(1, ), #initializer=Constant(dR2), initializer=RandomUniform(minval=self.dR2_lower, maxval=self.dR2_upper), constraint=MinMaxNorm(min_value=self.dR2_lower, max_value=self.dR2_upper, rate=1.0), trainable=True) self.amp_scaler = self.add_weight( name='amp_scaler', shape=(1, ), #initializer=Constant(amp_scaler),# initializer=RandomNormal(mean=float(5 * init_I_mean), stddev=float(5 * init_I_std)), constraint=MinMaxNorm(min_value=self.amp_scaler_lower, max_value=self.amp_scaler_upper), trainable=True) self.delta_w = self.add_weight(name='delta_w', shape=(self.n_res, ), initializer=Constant(self.larmor / 100), constraint=MinMaxNorm( min_value=self.delta_w_lower, max_value=self.delta_w_upper, rate=1.0), regularizer=L2(1e-2), trainable=True)
def build(self, input_shape): # initialize weight matrix for each capsule in lower layer self.rho = self.add_weight(shape=[input_shape[-1]], initializer=Constant(1.0), name='rho', constraint=MinMaxNorm()) self.gamma = self.add_weight(shape=[input_shape[-1]], initializer=Constant(1.0), name='gamma') self.beta = self.add_weight(shape=[input_shape[-1]], initializer=Constant(0.0), name='beta') self.built = True
def load_model(self, num_layers=10): self.add( Dense(units=32, input_shape=(32, ), use_bias=True, bias_constraint=MinMaxNorm(min_value=-1, max_value=1, rate=1.0, axis=0), bias_initializer=glorot_normal(seed=32), kernel_constraint=MaxNorm(max_value=1.5), kernel_initializer=glorot_uniform(seed=45))) self.add( Dense(units=32, use_bias=True, activation='tanh', bias_constraint=MinMaxNorm(min_value=-1, max_value=1, rate=1.0, axis=0), bias_initializer=glorot_normal(seed=32), kernel_constraint=MaxNorm(max_value=1.5), kernel_initializer=glorot_uniform(seed=45))) self.add(Dropout(rate=0.5)) self.add( Dense(units=10, use_bias=True, activation='softmax', bias_constraint=MinMaxNorm(min_value=-1, max_value=1, rate=1.0, axis=0), bias_initializer=glorot_normal(seed=32), kernel_constraint=MaxNorm(max_value=1.5), kernel_initializer=glorot_uniform(seed=45)))
def build(self, input_shape): # initialize weight matrix for each capsule in lower layer self.W = self.add_weight(shape=[input_shape[-1]], initializer=Ones(), name='weights', constraint=MinMaxNorm()) self.latent_size = input_shape[-1] # TODO: (local)Conv2D with high stride before dense? This is way to inefficient, no wonder UGATIT is 2G input_prod = np.prod(input_shape[1:]) self.fc_gamma = Dense(input_shape[-1]) self.fc_gamma.build((None, input_prod)) self.fc_beta = Dense(input_shape[-1]) self.fc_beta.build((None, input_prod)) self.flatten = Flatten() self.flatten.build(input_shape) self.trainable_weights.extend(self.fc_beta.trainable_weights) self.trainable_weights.extend(self.fc_gamma.trainable_weights) self.built = True
def return_norm(name, lstm_config, minimum, maximum, logger): """ Return Norm object to norm weight of neural network. """ log_name = name name = lstm_config[name.lower()] if name == 'maxnorm': logger.info("In {} use {} constraint with max={}".format( log_name, name, maximum)) return MaxNorm(maximum) if name == 'nonnegnorm': logger.info("In {} use {} constraint ".format(log_name, name)) return NonNeg() if name == 'minmaxnorm': logger.info("In {} use {} constraint with min={} and max={}".format( log_name, name, minimum, maximum)) return MinMaxNorm(minimum, maximum) else: logger.info("None constraint in {}.".format(log_name)) return None
def demand_lstm(step_back, ts_shape, y_shape): ''' Architecture for LSTM model :param step_back: number step back in time for demand :param ts_shape: shape of time time-space vector :param y_shape: shape of target vector :return: model ''' demand_input = Input(shape=(step_back, 1)) lstm_layer = LSTM(units=100, activation='tanh', return_sequences=True)(demand_input) dropout = Dropout(0.5)(lstm_layer) lstm_layer1 = LSTM(units=50, activation='tanh', return_sequences=True)(dropout) dropout_1 = Dropout(0.5)(lstm_layer1) lstm_layer2 = LSTM(units=25, activation='tanh', return_sequences=True)(dropout_1) dropout_2 = Dropout(0.2)(lstm_layer2) lstm_layer3 = LSTM(units=10, activation='tanh', return_sequences=True)(dropout_2) flatten_lstm3 = Flatten()(lstm_layer3) time_space_input = Input(shape=(ts_shape, )) dense_ts = Dense(64)(time_space_input) merge_ts_lstm = concatenate([flatten_lstm3, dense_ts]) dense_1 = Dense(75)(merge_ts_lstm) dense_2 = Dense(25)(dense_1) output_dense = Dense(y_shape, kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0))(dense_2) model = Model(inputs=[demand_input, time_space_input], outputs=output_dense) model.compile(optimizer='adam', loss=mean_squared_error, metrics=[rmse]) print(model.summary()) return model
def compile_elmo(self, print_summary=False): if self.parameters['token_encoding'] == 'word': word_inputs = Input(shape=(None, ), name='word_indices', dtype='int32') embeddings = Embedding(self.parameters['vocab_size'], self.parameters['hidden_units_size'], trainable=True, name='token_encoding') inputs = embeddings(word_inputs) drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') elif self.parameters['token_encoding'] == 'char': word_inputs = Input(shape=( None, self.parameters['token_maxlen'], ), dtype='int32', name='char_indices') inputs = self.char_level_token_encoder()(word_inputs) drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') re_lstm_inputs = Lambda(function=ELMo_obj.reverse)(lstm_inputs) mask = Lambda(function=ELMo_obj.reverse)(drop_inputs) for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) else: lstm = LSTM(units=self.parameters['lstm_units_size'], return_sequences=True, activation="tanh", recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs]) proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(lstm) lstm_inputs = add([proj, lstm_inputs], name='f_block_{}'.format(i + 1)) lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(lstm_inputs) for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: re_lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) else: re_lstm = LSTM( units=self.parameters['lstm_units_size'], return_sequences=True, activation='tanh', recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask]) re_proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(re_lstm) re_lstm_inputs = add([re_proj, re_lstm_inputs], name='b_block_{}'.format(i + 1)) re_lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(re_lstm_inputs) re_lstm_inputs = Lambda(function=ELMo_obj.reverse, name="reverse")(re_lstm_inputs) sampled_softmax = SampledSoftmax( num_classes=self.parameters['vocab_size'], num_sampled=int(self.parameters['num_sampled']), tied_to=embeddings if self.parameters['weight_tying'] and self.parameters['token_encoding'] == 'word' else None) outputs = sampled_softmax([lstm_inputs, next_ids]) re_outputs = sampled_softmax([re_lstm_inputs, previous_ids]) self._model = Model(inputs=[word_inputs, next_ids, previous_ids], outputs=[outputs, re_outputs]) # self._model.compile(optimizer=Adagrad(lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']), loss=None) # if print_summary: self._model.summary() self.wrap_multi_elmo_encoder()
def window_lstm(step_back, ts_shape, lr=0.001): demand_predictions = [] # array that will contain all predictions demand_input = Input(shape=(step_back, 1)) flatten_lstm_block_1 = lstm_block(demand_input) # adding time space and input time_space_input = Input(shape=(ts_shape, )) dense_ts = Dense(64, name='dense_ts')(time_space_input) merge_ts_lstm = concatenate([flatten_lstm_block_1, dense_ts]) dense_block_1 = dense_block(merge_ts_lstm) # generating d_t+1 d_t_plus_1 = Dense(1, kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0), name='d_t_plus_1')(dense_block_1) demand_predictions.append(d_t_plus_1) demand_input_2 = append_demand_input(demand_input, d_t_plus_1) flatten_lstm_block_2 = lstm_block(demand_input_2) merge_ts_lstm_2 = concatenate([flatten_lstm_block_2, dense_ts]) dense_block_2 = dense_block(merge_ts_lstm_2) # generating d_t+2 d_t_plus_2 = Dense(1, kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0), name='d_t_plus_2')(dense_block_2) demand_predictions.append(d_t_plus_2) # using d_t+2 prediction demand_input_3 = append_demand_input(demand_input_2, d_t_plus_2) flatten_lstm_block_3 = lstm_block(demand_input_3) merge_ts_lstm_3 = concatenate([flatten_lstm_block_3, dense_ts]) dense_block_3 = dense_block(merge_ts_lstm_3) # generating d_t+3 d_t_plus_3 = Dense(1, kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0), name='d_t_plus_3')(dense_block_3) demand_predictions.append(d_t_plus_3) # using d_t+3 prediction demand_input_4 = append_demand_input(demand_input_3, d_t_plus_3) flatten_lstm_block_4 = lstm_block(demand_input_4) merge_ts_lstm_4 = concatenate([flatten_lstm_block_4, dense_ts]) dense_block_4 = dense_block(merge_ts_lstm_4) # generating d_t+4 d_t_plus_4 = Dense(1, kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0), name='d_t_plus_4')(dense_block_4) demand_predictions.append(d_t_plus_4) # using d_t+4 prediction demand_input_5 = append_demand_input(demand_input_4, d_t_plus_4) flatten_lstm_block_5 = lstm_block(demand_input_5) merge_ts_lstm_5 = concatenate([flatten_lstm_block_5, dense_ts]) dense_block_5 = dense_block(merge_ts_lstm_5) # generating d_t+5 d_t_plus_5 = Dense(1, kernel_constraint=MinMaxNorm(min_value=0.0, max_value=1.0), name='d_t_plus_5')(dense_block_5) demand_predictions.append(d_t_plus_5) model = Model(inputs=[demand_input, time_space_input], outputs=demand_predictions) adam = Adam(lr=lr) model.compile(optimizer=adam, loss=mean_squared_error, metrics=[rmse]) return model
def build(self, input_shape): # initialize weight matrix for each capsule in lower layer self.beta = self.add_weight(shape = [1], initializer = Ones(), name = 'beta', constraint=MinMaxNorm(-0.2, 2.0, 0.8)) self.built = True
class ModelCollection(): BEST_MODEL_1 = {'estimator':Classifier('BEST_MODEL_1'), 'param_grid': {'units':[[200,150, 100], [100,100, 100], [200, 200, 200]], 'input_dim':[1110], 'output_dim':[49], 'activations':['relu', 'selu'], 'regularizers':[None, [l2(l=1e-5), l2(l=1e-5), l2(l=1e-5)]], 'bregularizers':[None, [l2(l=1e-4), l2(l=1e-4), l2(l=1e-4)]], 'initializers':['glorot_normal'], 'constraints':[MinMaxNorm(0,0.5)], 'dropouts':[[0.3, 0.2],[0.4, 0.3]], 'lr':[1e-5, 1e-6], 'loss_func':['categorical_crossentropy'], 'batch_size':[256, 1024], 'epochs':[5000]}} NEURAL_NETWORK_3LAYERS_COMPLETE = {'estimator':Classifier('3_LAYER_NN_A'), 'param_grid': {'units':[[156, 156, 156], [156, 102, 49], [121, 96, 49], [148, 128, 49]], 'input_dim':[156], 'output_dim':[49], 'activations':['relu', 'tanh'], 'regularizers':[None, [l2(l=0.0001), l2(l=0.0001), l2(l=0.0001)]], 'initializers':['glorot_normal', 'glorot_uniform'], 'dropouts':[0.2, 0.3], 'lr':[0.001, 0.0001], 'loss_func':['sparse_categorical_crossentropy', categorical_cubic_hinge, categorical_squared_hinge], 'batch_size':[32, 64, 128], 'epochs':[1000, 1500, 2000]}} NEURAL_NETWORK_2LAYERS_COMPLETE = {'estimator':Classifier('2_LAYER_NN_A'), 'param_grid': {'units':[[156, 156], [156, 49], [106, 49]], 'input_dim':[156], 'output_dim':[49], 'activations':['relu', 'tanh'], 'regularizers':[None, [l2(l=0.0001), l2(l=0.0001)]], 'initializers':['glorot_normal', 'glorot_uniform'], 'dropouts':[0.2, 0.3], 'lr':[0.001, 0.0001], 'loss_func':['sparse_categorical_crossentropy', categorical_cubic_hinge, categorical_squared_hinge], 'batch_size':[32, 64, 128], 'epochs':[1000, 1500, 2000]}} NEURAL_NETWORK_3LAYERS_COMPACT = {'estimator':Classifier('3_LAYER_NN_B'), 'param_grid': {'units':[[156, 156, 156], [156, 102, 49], [148, 128, 49]], 'input_dim':[156], 'output_dim':[49], 'activations':['relu'], 'regularizers':[None], 'initializers':['glorot_normal', 'glorot_uniform'], 'dropouts':[0.2, 0.3], 'lr':[0.0001], 'loss_func':['sparse_categorical_crossentropy'], 'batch_size':[64, 128], 'epochs':[1000]}} NEURAL_NETWORK_2LAYERS_COMPACT = {'estimator':Classifier('2_LAYER_NN_B'), 'param_grid': {'units':[[156, 156], [156, 49], [106, 49]], 'input_dim':[156], 'output_dim':[49], 'activations':['relu'], 'regularizers':[None], 'initializers':['glorot_normal', 'glorot_uniform'], 'dropouts':[0.2, 0.3], 'lr':[0.0001], 'loss_func':['sparse_categorical_crossentropy'], 'batch_size':[64, 128], 'epochs':[1000]}} NEURAL_NETWORK_MINIMALIST_1 = {'estimator':Classifier('1_MINIMALIST'), 'param_grid': {'units':[[156, 156], [156, 49], [106, 49]], 'input_dim':[156], 'output_dim':[49], 'activations':['relu'], 'regularizers':[None], 'initializers':['glorot_normal', 'glorot_uniform'], 'dropouts':[0.2, 0.3], 'lr':[0.0001], 'loss_func':['sparse_categorical_crossentropy'], 'batch_size':[64, 128], 'epochs':[1000]}} SVC_RBF = {'estimator':SVC(), 'param_grid': {'C':[2**i for i in range(1, 8)], 'gamma':[2**-i for i in range(4, 16)], 'kernel':['rbf']}} SVC_LINEAR = {'estimator':SVC(), 'param_grid': {'C':[2**i for i in range(1, 8)], 'gamma':[2**-i for i in range(4, 16)], 'kernel':['linear']}} SVC_SIG = {'estimator':SVC(), 'param_grid': {'C':[2**i for i in range(0, 7)], 'coef0':[np.linspace(-5, 5, 11)], 'gamma':[2**-i for i in range(5, 15)], 'kernel':['sigmoid'], 'class_weight':['balanced', None]}} SVC_POLY = {'estimator':SVC(), 'param_grid': {'C':[2**i for i in range(0, 7)], 'coef0':[np.linspace(-5, 5, 11)], 'gamma':[2**-i for i in range(5, 15)], 'degree':[1, 2, 3, 4], 'kernel':['poly'], 'class_weight':['balanced', None]}} KNN = {'estimator':KNeighborsClassifier(), 'param_grid': {'n_neighbors':[1, 3, 7, 15, 30, 60, 120, 250, 500], 'weights':['uniform', 'distance'], 'p':[1, 2], 'metric':['minkowski']}} LOGISTIC_REGRESSION = {'estimator':LogisticRegression(), 'param_grid': {'C':[i for i in range(1, 50, 2)], 'class_weight ':[None, 'balanced'], 'solver':['newton-cg', 'saga', 'lbfgs', 'sag'], 'multi_class':['ovr', 'multinomial'], 'l1_ratio':[0, 0.2, 0.5, 0.8, 1]}}
def __call__(self, w): return MinMaxNorm(self.min_value, self.max_value, self.penalty)(w)
def compile_elmo(self, print_summary=False): """ Compiles a Language Model RNN based on the given parameters 在给定的参数上编译语言模型 """ # 可以选择字符嵌入然后进行编码过程, 或者选择词嵌入然后进行编码过程 if self.parameters['token_encoding'] == 'word': # Train word embeddings from scratch 字嵌入 word_inputs = Input(shape=(None, ), name='word_indices', dtype='int32') embeddings = Embedding(self.parameters['vocab_size'], self.parameters['hidden_units_size'], trainable=True, name='token_encoding') inputs = embeddings(word_inputs) # Token embeddings for Input drop_inputs = SpatialDropout1D(self.parameters['dropout_rate'])( inputs ) # SpatialDropout1D随机将某一维置为零 https://blog.csdn.net/weixin_43896398/article/details/84762943 lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) # Pass outputs as inputs to apply sampled softmax next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') elif self.parameters['token_encoding'] == 'char': # Train character-level representation word_inputs = Input(shape=( None, self.parameters['token_maxlen'], ), dtype='int32', name='char_indices') inputs = self.char_level_token_encoder()( word_inputs) # 调用字符嵌入 卷积后的结果 # Token embeddings for Input # 执行dropout drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) # Pass outputs as inputs to apply sampled softmax next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') # Reversed input for backward LSTMs # 将LSTM结构直接反向。 为了后面实现反向的LSTM re_lstm_inputs = Lambda(function=ELMo.reverse)(lstm_inputs) mask = Lambda(function=ELMo.reverse)(drop_inputs) # mask也反向 # Forward LSTMs 前向LSTM for i in range(self.parameters['n_lstm_layers']): if self.parameters[ 'cuDNN']: # cuDNN 是加速的对应的LSTM或者RNN等 依赖于后端的gpu 这里可以选择加速的LSTM或者选择传统的LSTM lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) else: lstm = LSTM(units=self.parameters['lstm_units_size'], return_sequences=True, activation="tanh", recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs]) # Projection to hidden_units_size 输出后加Dense得到当前 proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(lstm) # Merge Bi-LSTMs feature vectors with the previous ones 将proj向量和lstm_inputs相加 lstm_inputs = add([proj, lstm_inputs], name='f_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers # 当前的LSTM执行dropout 然后可以输入下层的LSTM lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(lstm_inputs) # Backward LSTMs 反向的LSTM for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: re_lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) else: re_lstm = LSTM( units=self.parameters['lstm_units_size'], return_sequences=True, activation='tanh', recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask]) # Projection to hidden_units_size re_proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(re_lstm) # Merge Bi-LSTMs feature vectors with the previous ones 将re_proj向量和re_lstm_inputs相加 re_lstm_inputs = add([re_proj, re_lstm_inputs], name='b_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers 反向的每层加dropout re_lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(re_lstm_inputs) # Reverse backward LSTMs' outputs = Make it forward again 将反向的LSTM的输出反向 re_lstm_inputs = Lambda(function=ELMo.reverse, name="reverse")(re_lstm_inputs) # Project to Vocabulary with Sampled Softmax sampled_softmax = SampledSoftmax( num_classes=self.parameters['vocab_size'], num_sampled=int(self.parameters['num_sampled']), tied_to=embeddings if self.parameters['weight_tying'] else None) # 正向LSTM每次输入,然后预测下一个词 反向LSTM 每次输入,然后预测上一个词 outputs = sampled_softmax([lstm_inputs, next_ids]) re_outputs = sampled_softmax([re_lstm_inputs, previous_ids]) self._model = Model(inputs=[word_inputs, next_ids, previous_ids], outputs=[outputs, re_outputs]) # 正向和反向的输出 self._model.compile(optimizer=Adagrad( lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']), loss=None) if print_summary: self._model.summary()
def compile_elmo(self, print_summary=False): """ Compiles a Language Model RNN based on the given parameters """ if self.parameters['token_encoding'] == 'word': # Train word embeddings from scratch word_inputs = Input(shape=(None, ), name='word_indices', dtype='int32') embeddings = Embedding(self.parameters['vocab_size'], self.parameters['hidden_units_size'], trainable=True, name='token_encoding') inputs = embeddings(word_inputs) # Token embeddings for Input drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) # Pass outputs as inputs to apply sampled softmax next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') elif self.parameters['token_encoding'] == 'char': # Train character-level representation word_inputs = Input(shape=( None, self.parameters['token_maxlen'], ), dtype='int32', name='char_indices') inputs = self.char_level_token_encoder()(word_inputs) # Token embeddings for Input drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) # Pass outputs as inputs to apply sampled softmax next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') # Reversed input for backward LSTMs re_lstm_inputs = Lambda(function=ELMo.reverse)(lstm_inputs) mask = Lambda(function=ELMo.reverse)(drop_inputs) # Forward LSTMs for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) else: lstm = LSTM(units=self.parameters['lstm_units_size'], return_sequences=True, activation="tanh", recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs]) # Projection to hidden_units_size proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(lstm) # Merge Bi-LSTMs feature vectors with the previous ones lstm_inputs = add([proj, lstm_inputs], name='f_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(lstm_inputs) # Backward LSTMs for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: re_lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) else: re_lstm = LSTM( units=self.parameters['lstm_units_size'], return_sequences=True, activation='tanh', recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask]) # Projection to hidden_units_size re_proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(re_lstm) # Merge Bi-LSTMs feature vectors with the previous ones re_lstm_inputs = add([re_proj, re_lstm_inputs], name='b_block_{}'.format(i + 1)) # Apply variational drop-out between BI-LSTM layers re_lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(re_lstm_inputs) # Reverse backward LSTMs' outputs = Make it forward again re_lstm_inputs = Lambda(function=ELMo.reverse, name="reverse")(re_lstm_inputs) # Project to Vocabulary with Sampled Softmax sampled_softmax = SampledSoftmax( num_classes=self.parameters['vocab_size'], num_sampled=int(self.parameters['num_sampled']), tied_to=embeddings if self.parameters['weight_tying'] and self.parameters['token_encoding'] == 'word' else None) outputs = sampled_softmax([lstm_inputs, next_ids]) re_outputs = sampled_softmax([re_lstm_inputs, previous_ids]) self._model = Model(inputs=[word_inputs, next_ids, previous_ids], outputs=[outputs, re_outputs]) self._model.compile(optimizer=Adagrad( lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']), loss=None) if print_summary: self._model.summary()
def build(self, input_shape): # initialize weight matrix for each capsule in lower layer self.beta = self.add_weight(shape=list(input_shape)[1:], name = 'beta', initializer=Ones(), constraint=MinMaxNorm(-0.1, 2.0, 0.8), trainable=True) self.built = True
num_classes = 10 y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) # model = Sequential() # # The input layer requires the special input_shape parameter which should match # # the shape of our training data. # # MinMaxNorm(min_value=0.0, max_value=1.0, rate=1.0, axis=0 # model.add(Dense(units=4, activation='sigmoid', input_shape=(image_size,), use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0))) # model.add(Dense(units=num_classes, activation='softmax', use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0))) # model.summary() inputs = Input(shape=(784,), name='img') dense_1 = Dense(90, activation='relu', use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0)) intermediate_output = dense_1(inputs) dense_2 = Dense(60, activation='relu', use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0)) intermediate_output = dense_2(intermediate_output) dense = Dense(num_classes, activation="softmax", use_bias=False, kernel_constraint=MinMaxNorm(min_value=-1.0, max_value=1.0, rate=1.0, axis=0)) outputs = dense(intermediate_output) intermediate_model = Model(inputs=inputs, outputs=intermediate_output) model = Model(inputs=inputs, outputs=outputs, name='mnist_model') model.summary() logger = keras.callbacks.ProgbarLogger(count_mode='samples', stateful_metrics=None) model.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['accuracy']) history = model.fit(x_train, y_train, validation_split=0.1,
def _compile_hans(self, shape, n_hidden_layers, hidden_units_size, dropout_rate, word_dropout_rate, lr): """ Compiles a Hierarchical Attention Network based on the given parameters :param shape: The shape of the sequence, i.e. (number of sections, number of tokens) :param hidden_units_size: size of hidden units, as a list :param dropout_rate: The percentage of inputs to dropout :param word_dropout_rate: The percentage of timesteps to dropout :param lr: learning rate :return: Nothing """ # Sentence Feature Representation section_inputs = Input(shape=(None, ), name='document_inputs') self.pretrained_embeddings = self.PretrainedEmbedding() section_embs = self.pretrained_embeddings(section_inputs) # Apply variational dropout drop_section_embs = SpatialDropout1D( dropout_rate, name='feature_dropout')(section_embs) encodings = TimestepDropout(word_dropout_rate, name='word_dropout')(drop_section_embs) # Bi-GRUs over token embeddings for i in range(n_hidden_layers[0]): if self._cuDNN: grus = Bidirectional( CuDNNGRU(hidden_units_size[0], return_sequences=True, kernel_constraint=MinMaxNorm(min_value=-2, max_value=2)), name='bidirectional_grus_{}'.format(i))(encodings) else: grus = Bidirectional( GRU(hidden_units_size[0], activation="tanh", recurrent_activation='sigmoid', return_sequences=True), kernel_constraint=MinMaxNorm(min_value=-2, max_value=2), name='bidirectional_grus_{}'.format(i))(encodings) grus = Camouflage(mask_value=0.0)([grus, encodings]) if i == 0: encodings = SpatialDropout1D(dropout_rate)(grus) else: encodings = add([grus, encodings]) encodings = SpatialDropout1D(dropout_rate)(encodings) # Attention over BI-GRU (context-aware) embeddings if self._attention_mechanism == 'maxpooling': section_encoder = GlobalMaxPooling1D()(encodings) elif self._attention_mechanism == 'attention': encodings = SymmetricMasking()([encodings, encodings]) section_encoder = ContextualAttention( kernel_regularizer=l2(), bias_regularizer=l2())(encodings) # Wrap up section_encoder section_encoder = Model(inputs=section_inputs, outputs=section_encoder, name='sentence_encoder') # Document Input Layer document_inputs = Input(shape=( shape[0], shape[1], ), name='document_inputs') # Distribute sentences section_encodings = TimeDistributed( section_encoder, name='sentence_encodings')(document_inputs) # BI-GRUs over section embeddings for i in range(n_hidden_layers[1]): if self._cuDNN: grus = Bidirectional( CuDNNGRU(hidden_units_size[1], return_sequences=True, kernel_constraint=MinMaxNorm(min_value=-2, max_value=2)), name='bidirectional_grus_upper_{}'.format(i))( section_encodings) else: grus = Bidirectional(GRU(hidden_units_size[1], activation="tanh", recurrent_activation='sigmoid', return_sequences=True, kernel_constraint=MinMaxNorm( min_value=-2, max_value=2)), name='bidirectional_grus_upper_{}'.format( i))(section_encodings) grus = Camouflage(mask_value=0.0)([grus, section_encodings]) if i == 0: section_encodings = SpatialDropout1D(dropout_rate)(grus) else: section_encodings = add([grus, section_encodings]) section_encodings = SpatialDropout1D(dropout_rate)( section_encodings) # Attention over BI-LSTM (context-aware) sentence embeddings if self._attention_mechanism == 'maxpooling': doc_encoding = GlobalMaxPooling1D( name='max_pooling')(section_encodings) elif self._attention_mechanism == 'attention': section_encodings = SymmetricMasking()( [section_encodings, section_encodings]) doc_encoding = ContextualAttention( kernel_regularizer=l2(), bias_regularizer=l2(), name='self_attention')(section_encodings) losses = 'binary_crossentropy' if self._decision_type == 'multi_label' else 'categorical_crossentropy' loss_weights = None # Final output (projection) layer outputs = Dense(self.n_classes, activation='sigmoid' if self._decision_type == 'multi_label' else 'softmax', name='outputs')(doc_encoding) # Wrap up model + Compile with optimizer and loss function self.model = Model(inputs=document_inputs, outputs=[outputs]) self.model.compile(optimizer=Adam(lr=lr, clipvalue=2.0), loss=losses, loss_weights=loss_weights)