def __init__(self, num_filter, size_filter, sampling_stride, use_regular_uppsampling = False, size = 2, rate = 0.1, l1 = 0.01, l2 = 0.01, use_max_pool = False): super(UpsampleMod_s_res, self).__init__() self.use_max_pool = use_max_pool self.bn1 = LayerNormalization() self.bn2 = LayerNormalization() self.bn3 = LayerNormalization() self.bn4 = LayerNormalization() self.reg1 = L1L2(l1=l1, l2=l2) self.reg2 = L1L2(l1=l1, l2=l2) self.reg3 = L1L2(l1=l1, l2=l2) self.reg4 = L1L2(l1=l1, l2=l2) self.act = LeakyReLU() self.add = Add() self.concat = Concatenate(axis=2) self.conv1 = Conv1D(num_filter, size_filter, padding='same', kernel_regularizer = self.reg1, use_bias=False) self.conv2 = Conv1D(num_filter, size_filter, padding='same', kernel_regularizer = self.reg2, use_bias=False) self.conv3 = Conv1D(num_filter, size_filter, padding='same', kernel_regularizer = self.reg3, use_bias=False) if not self.use_max_pool: self.u_sample = Conv1DTranspose(num_filter, size_filter, strides = sampling_stride, kernel_regularizer = self.reg4, use_bias=False) else: self.u_sample = UpSampling1D(size = size) self.dOut = Dropout(rate)
def first_layers(name, l1, l2, patch, activation): projection = Dense(64, activation = None, use_bias = False, kernel_regularizer = L1L2(l1=l1, l2=l2), name = name+"_proj") input_conv = Conv1D(64, 9, padding = "same", activation = LeakyReLU(0.2), kernel_regularizer = L1L2(l1=l1, l2=l2), name = name+"_input_conv") if patch: out = Conv1D(1, 9, padding= "same", activation = activation, kernel_regularizer = L1L2(l1=l1, l2=l2), name = name + "_conv_out") else: out = Dense(1, activation= activation, use_bias = False, kernel_regularizer = L1L2(l1=l1, l2=l2), name = name + "_dense_out") return projection, input_conv, out
def create_model(args, learning_rate, l1): hidden_layers = [int(n) for n in args.hidden_layers.split(',')] inputs = Input(shape=[N_FEATURES]) hidden = inputs if hidden_layers != [-1]: for size in hidden_layers: hidden = Dense(size, kernel_regularizer=L1L2(l1=l1), bias_regularizer=L1L2(l1=l1))(hidden) hidden = BatchNormalization()(hidden) hidden = ReLU()(hidden) outputs = Dense(1)(hidden) model = Model(inputs=inputs, outputs=outputs) # I know this is ugly, but I added the sgd arg only later so older networks # do not have args.optimizer (and were optimized with Adam) try: if args.optimizer == "sgd": optimizer = SGD(learning_rate=learning_rate, momentum=0.99, nesterov=True) elif args.optimizer == "adam": optimizer = Adam(learning_rate=learning_rate) except AttributeError: optimizer = Adam(learning_rate=learning_rate) model.compile( optimizer=optimizer, loss='mse', metrics=[RootMeanSquaredError(), MeanAbsoluteError(), RSquare()]) return model
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.gcn0 = GCN(16, activation=tf.nn.relu, kernel_regularizer=L1L2(l2=l2_coef)) self.gcn1 = GCN(num_classes, kernel_regularizer=L1L2(l2=l2_coef)) self.dropout = tf.keras.layers.Dropout(drop_rate)
def __init__(self, num_filter, size_filter, sampling_stride, use_max_pool=False, pool_size = 2, rate = 0.1, l1 = 0.01, l2 = 0.01, sample=True): super(DownSampleMod_res, self).__init__() self.use_max_pool = use_max_pool self.bn1 = LayerNormalization() self.bn2 = LayerNormalization() self.bn3 = LayerNormalization() self.bn4 = LayerNormalization() self.reg1 = L1L2(l1=l1, l2=l2) self.reg2 = L1L2(l1=l1, l2=l2) self.reg3 = L1L2(l1=l1, l2=l2) self.reg4 = L1L2(l1=l1, l2=l2) self.add = Add() self.act = LeakyReLU() self.conv1 = Conv1D(num_filter, size_filter, padding='same', kernel_regularizer = self.reg1, use_bias=False) self.conv2 = Conv1D(num_filter, size_filter, padding='same', kernel_regularizer = self.reg2, use_bias=False) self.conv3 = Conv1D(num_filter, size_filter, padding='same', kernel_regularizer = self.reg3, use_bias=False) if not self.use_max_pool: self.d_sample = Conv1D(num_filter, size_filter, padding='same', strides = sampling_stride, kernel_regularizer = self.reg4, use_bias=False) else: self.d_sample = MaxPool1D(pool_size = pool_size, strides = sampling_stride) self.dOut = Dropout(rate) self.sample = sample
def quantile_head(inputs, timesteps, target_features, reg=(0.1, 0.1)): if timesteps == 1: x = Dense(target_features, kernel_regularizer=L1L2(*reg))(inputs) return x else: x = TimeDistributed( Dense(target_features, kernel_regularizer=L1L2(*reg)))(inputs) return x
def _init_model_32(self, inputs): # 3 to 2 network x = inputs x = CuDNNLSTM(96, kernel_regularizer=L1L2(l1=0.01, l2=0.01), \ recurrent_regularizer=L1L2(l1=0.01, l2=0.01), return_sequences = True)(x) x = CuDNNLSTM(32, kernel_regularizer=L1L2(l1=0.01, l2=0.01), \ recurrent_regularizer=L1L2(l1=0.01, l2=0.01), return_sequences = False)(x) x = Dense(2, activation='softmax')(x) return x
def __init__(self, filters, size, strides=1, dilation=1, constrains=None, l1=0.0, l2=0.0, rate=0.2): super(ResModPreActSN, self).__init__() self.filters = filters self.kernel = size self.strides = strides self.dilation = dilation self.constrains = constrains self.l1 = l1 self.l2 = l2 self.rate = rate self.conv1 = SpectralNormalization( Conv1D(self.filters, self.kernel, dilation_rate=self.dilation, padding='same', use_bias=True, kernel_regularizer=L1L2(l1=self.l1, l2=self.l2))) self.conv2 = SpectralNormalization( Conv1D(self.filters, self.kernel, dilation_rate=self.dilation, padding='same', use_bias=True, kernel_regularizer=L1L2(l1=self.l1, l2=self.l2))) self.conv = SpectralNormalization( Conv1D(self.filters, 1, padding='same', use_bias=False, kernel_regularizer=L1L2(l1=self.l1, l2=self.l2))) if self.strides > 1: self.conv3 = SpectralNormalization( Conv1D(self.filters, self.kernel, dilation_rate=1, strides=self.strides, padding='same', use_bias=True, kernel_regularizer=L1L2(l1=self.l1, l2=self.l2))) self.add = Add() self.dout = Dropout(self.rate) self.act = LeakyReLU(0.2)
def standard_LSTM(X_train, y_train, quantiles=False, loss='mse', optimizer='adam', units=64, layers=1, dropout=0, reg=(0.1, 0.1)): timesteps = y_train.shape[1] target_features = y_train.shape[2] if timesteps == 1: return_seq = False else: return_seq = True inputs = Input(shape=X_train.shape[1:]) x = inputs for i in range(layers): x = LSTM(units, input_shape=X_train.shape[1:], return_sequences=return_seq, activation='relu', recurrent_regularizer=L1L2(0, 0), kernel_regularizer=L1L2(0, 0))(x) if dropout: x = Dropout(dropout)(x) if not quantiles: x = quantile_head(x, timesteps, target_features, reg) model = Model(inputs=inputs, outputs=x) else: qheads = [] for q in range(quantiles): qheads.append(quantile_head(x, timesteps, target_features, reg)) model = Model(inputs=inputs, outputs=qheads) if quantiles: loss = [ lambda pred, true, q=q: pinball_loss(q, pred, true) for q in np.linspace(0.1, 1, quantiles, endpoint=False) ] model.compile(loss=loss, optimizer=optimizer) return model
def make_conv(input_layer, features, stride=1, decay=1e-2, transpose=False, dilation=1): layer = Conv2DTranspose if transpose else SeparableConv2D extra_params = {'kernel_initializer': 'orthogonal', 'kernel_regularizer': L1L2(decay) } if transpose else {'pointwise_initializer': 'orthogonal', 'depthwise_initializer': 'orthogonal', 'depthwise_regularizer': L1L2(decay, decay), 'pointwise_regularizer': L1L2(decay, decay) } return layer(filters=features, kernel_size=1 + 2 * stride, strides=stride, padding=stride, **extra_params, dilation_rate=dilation)(input_layer)
def createSparseAE(encoding_dim=16, window=10): # encoding_dim = 16 lambda_l1 = 0.000001 # Ðнкодер input_img = Input(shape=(window, )) flat_img = Flatten()(input_img) x = Dense(window - 2)(flat_img) # , activation='relu' # x = Dense(encoding_dim * 2, activation='relu')(x) encoded = Dense(encoding_dim, activation='linear', activity_regularizer=L1L2(lambda_l1))(x) # Декодер input_encoded = Input(shape=(encoding_dim, )) x = Dense(encoding_dim - 2)(input_encoded) # , activation='relu' # x = Dense(encoding_dim * 3, activation='relu')(x) flat_decoded = Dense(window)(x) # , activation='sigmoid' decoded = Reshape((window, ))(flat_decoded) # Модели encoder = Model(input_img, encoded, name="encoder") decoder = Model(input_encoded, decoded, name="decoder") autoencoder = Model(input_img, decoder(encoder(input_img)), name="autoencoder") return encoder, decoder, autoencoder
def annotator(i): print(all_names[i]) m = Sequential() m.add( Dense(1, activation='sigmoid', kernel_regularizer=L1L2(l1=0.0, l2=0.1), input_dim=100)) m.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy']) if vars(args)["train_annotators"]: filepath = "ano" + str(i) + ".best.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] m.fit(X_train, y_train[:, i], epochs=50, validation_data=(X_test, y_test[:, i]), callbacks=callbacks_list) m.load_weights("models/" + "ano" + str(i) + ".best.hdf5") return m
def loss(self, y_pred, y_true): losses = self.type.keys() loss_type = self.meta['type'].strip('[]') out_size = self.meta['out_size'] H, W, _ = self.meta['inp_size'] HW = H * W try: assert loss_type in losses, f'Loss type {loss_type} not implemented' except AssertionError as e: self.flags.error = str(e) self.logger.error(str(e)) SharedFlagIO.send_flags(self) raise if self.first: self.logger.info('{} loss hyper-parameters:'.format( self.meta['model'])) self.logger.info('Input Grid Size = {}'.format(HW)) self.logger.info('Number of Outputs = {}'.format(out_size)) self.first = False diff = y_true - y_pred if loss_type in ['sse', '12']: return tf.nn.l2_loss(diff) elif loss_type == 'mse': return tf.keras.losses.MSE(y_true, y_pred) elif loss_type == ['smooth']: small = tf.cast(diff < 1, tf.float32) large = 1. - small return L1L2(tf.multiply(diff, large), tf.multiply(diff, small)) elif loss_type in ['sparse', 'l1']: return l1(diff) elif loss_type == 'softmax': _loss = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred) return tf.reduce_mean(_loss)
def kernel_regularizer(self): """Return l2 loss using 0.5*reg_lambda as the l2 term (as desired). L2 regularization is required for this loss function to be strongly convex. Returns: The L2 regularizer layer for this loss function, with regularizer constant set to half the 0.5 * reg_lambda. """ return L1L2(l2=self.reg_lambda/2)
def fit_model(self, lstm_layer_size=300, dropout=0.2, epochs=400, l1_val=0.01, l2_val=0.01, early_stop_score=0.001): print('Fitting model...') print('Params:') print('L1_val: ', l1_val) print('L2_val: ', l2_val) print('Dropout: ', dropout) print('Epochs: ', epochs) print('Lstm layer size :', lstm_layer_size) print('Early stop score:', early_stop_score) # do the actual lil lstm bit self.L1_val = l1_val self.L2_val = l2_val self.num_epochs = epochs self.dropout = dropout self.lstm_layer_size = lstm_layer_size self.early_stop_score = early_stop_score # fit network # regularizers = [L1L2(l1=0.0, l2=0.0), L1L2(l1=0.01, l2=0.0), L1L2(l1=0.0, l2=0.01), L1L2(l1=0.01, l2=0.01)] # es = EarlyStopping(monitor='val_loss', mode='min', verbose=1) # es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, baseline=0.8) es = EarlyStoppingByLossVal(monitor='val_loss', value=self.early_stop_score, verbose=1) regularizers = L1L2(self.L1_val, self.L2_val) # this initialisation can go in __init__ self.model = Sequential() self.model.add( LSTM(self.lstm_layer_size, input_shape=(self.train_X.shape[1], self.train_X.shape[2]), bias_regularizer=regularizers)) # model.add(Flatten()) self.model.add(Dropout(self.dropout)) # model.add(Dense(50)) # model.add(Dropout(0.2)) self.model.add(Dense(1)) self.model.compile(loss='mae', optimizer='adam') self.history = self.model.fit(self.train_X, self.train_y, self.batch_size, self.num_epochs, validation_data=(self.test_X, self.test_y), verbose=2, shuffle=True, callbacks=[es])
def _build_train_op(self): """add the L1/L2 regularizations to controller loss """ if 'regularizer' in self.data_description_config: l1 = self.data_description_config["regularizer"].pop('l1', 0) l2 = self.data_description_config["regularizer"].pop('l2', 0) l1l2_reg = L1L2(l1=l1, l2=l2) dd_reg = tf.reduce_sum([l1l2_reg(x) for x in self.w_dd]) self.loss += dd_reg super()._build_train_op()
def sigmoid_crossentropy(): inputs = Input(784) x = Dense( 30, activation=sigmoid, kernel_initializer=RandomNormal, bias_initializer=Zeros, kernel_regularizer=L1L2(l2=5e-5), )(inputs) x = Dropout(0.5)(x) outputs = Dense( 10, activation=sigmoid, kernel_initializer=RandomNormal, bias_initializer=Zeros, kernel_regularizer=L1L2(l2=5e-5), )(x) model = Model(inputs=inputs, outputs=outputs) cfg = {"lr": 0.5, "loss": binary_crossentropy} return model, cfg
def build_model(hp): Input_layer = Input(shape=[2,], name='Input') # Lambda layer to separately use it as only L/D ratio input for a fork in Network. MaxVal_input = Lambda(get_LD, name='MaxVal_input', output_shape=(1,))(Input_layer) # Defining the MaxVal fork of SN MaxVal_layer = [] # hl = hp.Int('hl', min_value =2, max_value=15, step=1) hl = 14 # MaxVal_units = hp.Int('MaxVal_units' ,min_value=1,max_value=10,step=2) MaxVal_units = 7 MaxVal_L1 = hp.Choice('MaxVal_L1', [1.0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]) MaxVal_L2 = hp.Choice('MaxVal_L2', [1.0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]) for i in range(hl): if i==0: # First layer in MaxVal fork MaxVal_layer.append(Dense(MaxVal_units,name='MaxVal_layer%d' %(i+1), kernel_regularizer=L1L2(l1=MaxVal_L1, l2=MaxVal_L2), activation='relu')(MaxVal_input)) elif i==hl-1: # Last layer in MaxVal fork. *Activation must be linear* MaxVal_layer.append(Dense(1, kernel_regularizer=L1L2(l1=MaxVal_L1, l2=MaxVal_L2), name='MaxVal_Final_layer', activation='linear')(MaxVal_layer[i-1])) else: # For intermediate layers MaxVal_layer.append(Dense(MaxVal_units, name='MaxVal_layer%d' %(i+1), kernel_regularizer=L1L2(l1=MaxVal_L1, l2=MaxVal_L2), activation='relu')(MaxVal_layer[i-1])) # Defining the IndVal fork of Neural Network IndVal_L1 = hp.Choice('IndVal_L1' , [1.0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]) IndVal_L2 = hp.Choice('IndVal_L2' , [1.0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]) IndVal_layer = [] # IndVal_units = hp.Int('IndVal_units', min_value=1, max_value=37, step=3) IndVal_units = 22 for i in range(hl): if i==0: # First layer in MaxVal fork IndVal_layer.append(Dense(IndVal_units, name='IndVal_layer%d' %(i+1), kernel_regularizer=L1L2(l1=IndVal_L1, l2=IndVal_L2), activation='relu')(Input_layer)) elif i==hl-1: # Last layer in MaxVal fork. *Activation must be linear* IndVal_layer.append(Dense(1, kernel_regularizer=L1L2(l1=IndVal_L1, l2=IndVal_L2), name='IndVal_Final_layer', activation='linear')(IndVal_layer[i-1])) else:# For intermediate layers IndVal_layer.append(Dense(IndVal_units, name='IndVal_layer%d' %(i+1), kernel_regularizer=L1L2(l1=IndVal_L1, l2=IndVal_L2), activation='relu')(IndVal_layer[i-1])) # Building the model with all connections model = Model(inputs= [Input_layer], outputs= [IndVal_layer[hl-1], MaxVal_layer[hl-1]]) # Defining Optimizer Adam = tf.keras.optimizers.Adam(learning_rate=0.0005, # learning_rate=hp.Choice('Adam_lr', [1e-0,1e-1,1e-2,5e-3,1e-3,8e-4,5e-4,3e-4,1e-4,1e-5]), name="Adam") # Compiling the model model.compile(loss='mse', optimizer=Adam, metrics=['mape']) return model
def res_u_net_tor(p): reg = L1L2(l1=p['l1'][-1], l2=p['l2'][-1]) inp = Input(shape=(p['max_seq_len'], p['features']), name='inp2') x = UNetModule_res(p)(inp) o = Conv1D(p['num_classes'], p['kernel_size'][-1], activation=p['output_activation'], padding='same', kernel_regularizer=reg, name='out2')(x) out_ang = Angularization(size_alphabet=p['num_classes'])(o) model = Model(inputs=inp, outputs=out_ang) return model
def ins_u_net(p): reg = L1L2(l1=p['l1'][-1], l2=p['l2'][-1]) inp = Input(shape=(p['max_seq_len'])) x = Embedding(VOCAB_SIZE, p['emb_size'])(inp) x = BatchNormalization()(x) x = UNetModule_ins(p)(x) out = Conv1D(p['num_classes'], p['kernel_size'][-1], activation=p['output_activation'], padding='same', kernel_regularizer=reg)(x) model = Model(inputs=inp, outputs=out) return model
def _conv_block(inputs): # don't use bias, if batch_normalization bias = True if batch_norm else False x = Conv2D(n_filters, filter_size, use_bias=bias, kernel_regularizer=L1L2(l1_reg, l2_reg))(inputs) x = Activation(activation)(x) if batch_norm: x = BatchNormalization()(x) elif dropout > 0: x = Dropout(rate=dropout)(x) return MaxPool2D()(x)
def __init__(self, p, typ = 'original', emb = True): super(U_net,self).__init__() assert typ in ['original', 'res', 'ins'], r"typ needs to be one of 'original', 'res' or 'ins' " self.h_parameters = p reg = L1L2(l1 = p['l1'][-1], l2 = p['l2'][-1]) if typ == 'original': self.u_net_module = UNetModule(self.h_parameters) elif typ == 'res': self.u_net_module = UNetModule_res(self.h_parameters) elif typ == 'ins': self.u_net_module = UNetModule_ins(self.h_parameters) if emb: self.embedding = Embedding(VOCAB_SIZE, self.h_parameters['emb_size']) self.bn_embedding = LayerNormalization() self.conv_out = Conv1D(VOCAB_SIZE, self.h_parameters['kernel_size'][-1] ,activation=self.h_parameters['output_activation'], padding='same' ,kernel_regularizer = reg, name='out1') self.emb = emb
def model(hidden_dim=512, input_dim=28*28, sigma_regularization=1e-3, mu_regularization=1e-5, k=10, activation = lambda x: K.relu(x, 1.0 / 5.5)): """Create two layer MLP with softmax output""" _x = Input(shape=(input_dim,)) layer = lambda output_dim, activation: BayesianDense(output_dim, activation=activation, W_sigma_regularizer=VariationalRegularizer(weight=sigma_regularization), b_sigma_regularizer=VariationalRegularizer(weight=sigma_regularization), W_regularizer=L1L2(l1=mu_regularization)) h1 = layer(hidden_dim, activation) h2 = layer(hidden_dim, activation) y = layer(k, 'softmax') _y = y(h2(h1(_x))) m = Model(_x, _y) m.compile(Adam(1e-3),loss='categorical_crossentropy') return m
def _custom_init(self): # Build the Keras API dictionary of parameters conv = { "filters": self._filters, "kernel_size": self._kernel_size, "activation": self._activation, "padding": self._padding, "kernel_regularizer": L1L2(l1=self._l1_reg, l2=self._l2_reg), } # Add additional Keras parameters if passed if self._from_keras is not None: conv = {**conv, **self._from_keras} # Build the SegmentationModel API dictionary of parameters options = { "pool": self._pool, "dropout": self._dropout, "batch_norm": self._batch_norm, "up_sample": self._up_sample, } self._seg_model = custom_unet(self._input_size, conv, **options)
def _prep_predictor(self, name="predictor"): from tensorflow.keras.regularizers import L1, L2, L1L2 from tensorflow.keras.layers import ( Dropout, Dense, Activation, ) layers = [] if self.use_predictor and (self.predictor_dropout_rate > 0.0): layers.append( Dropout(self.predictor_dropout_rate, name=f"{self.name}/{name}/dropout")) if (self.predictor_l1_rate > 0.0) and (self.predictor_l2_rate > 0.0): reg = L1L2(self.predictor_l1_rate, self.predictor_l2_rate) elif self.predictor_l1_rate > 0.0: reg = L1(self.predictor_l1_rate) elif self.predictor_l2_rate > 0.0: reg = L2(self.predictor_l2_rate) else: reg = None if self.use_predictor: layers.append( Dense(self.predictor_nunits, activation="linear", kernel_regularizer=reg, use_bias=self.predictor_use_bias, name=f"{self.name}/{name}/dense")) if self.predictor_activation != "linear": layers.append( Activation(self.predictor_activation, name=f"{self.name}/{name}/activation")) return layers
def _prep_conv(self, name="conv"): from tensorflow.keras.regularizers import L1, L2, L1L2 from tensorflow.keras.layers import MaxPooling1D from selectml.tf.layers import ( AddChannel, Flatten1D, ConvLinkage, ) layers = [] if self.conv_nlayers > 0: if (self.conv_l1_rate > 0.0) and (self.conv_l2_rate > 0.0): reg = L1L2(self.conv_l1_rate, self.conv_l2_rate) elif self.conv_l1_rate > 0.0: reg = L1(self.conv_l1_rate) elif self.conv_l2_rate > 0.0: reg = L2(self.conv_l2_rate) else: reg = None layers.extend([ AddChannel(name=f"{self.name}/{name}/addchannel"), ConvLinkage(nlayers=self.conv_nlayers, filters=self.conv_filters, strides=self.conv_strides, kernel_size=self.conv_kernel_size, activation=self.conv_activation, activation_first=self.conv_activation, activation_last=self.conv_activation, kernel_regularizer=reg, use_bn=self.conv_use_batchnorm, name=f"{self.name}/{name}/convlinkage"), MaxPooling1D(pool_size=1, name=f"{self.name}/{name}/maxpool"), Flatten1D(name=f"{self.name}/{name}/flatten1d"), ]) return layers
def __init__(self, filters=64, activity_regularizer=None): # for sparse autoencoding if activity_regularizer is not None: activity_regularizer = L1L2(l1=activity_regularizer) super(XceptionEncoderBlock, self).__init__() self.residual = Sequential([ Conv2D(filters, (1, 1), strides=(2, 2), padding='same', use_bias=False), BatchNormalization() ]) self.block = Sequential([ SeparableConv2D(filters, (3, 3), padding='same', use_bias=False), BatchNormalization(), Activation('relu'), SeparableConv2D(filters, (3, 3), padding='same', use_bias=False, activity_regularizer=activity_regularizer), BatchNormalization(), MaxPool2D((3, 3), strides=(2, 2), padding='same') ])
def deepmoji_architecture(nb_classes, nb_tokens, maxlen, feature_output=False, embed_dropout_rate=0, final_dropout_rate=0, embed_l2=1E-6, return_attention=False): """ Returns the DeepMoji architecture uninitialized and without using the pretrained model weights. # Arguments: nb_classes: Number of classes in the dataset. nb_tokens: Number of tokens in the dataset (i.e. vocabulary size). maxlen: Maximum length of a token. feature_output: If True the model returns the penultimate feature vector rather than Softmax probabilities (defaults to False). embed_dropout_rate: Dropout rate for the embedding layer. final_dropout_rate: Dropout rate for the final Softmax layer. embed_l2: L2 regularization for the embedding layerl. # Returns: Model with the given parameters. """ # define embedding layer that turns word tokens into vectors # an activation function is used to bound the values of the embedding model_input = Input(shape=(maxlen,), dtype='int32') embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None embed = Embedding(input_dim=nb_tokens, output_dim=256, mask_zero=True, input_length=maxlen, embeddings_regularizer=embed_reg, name='embedding') x = embed(model_input) x = Activation('tanh')(x) # entire embedding channels are dropped out instead of the # normal Keras embedding dropout, which drops all channels for entire words # many of the datasets contain so few words that losing one or more words can alter the emotions completely if embed_dropout_rate != 0: embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop') x = embed_drop(x) # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features # ordering of the way the merge is done is important for consistency with the pretrained model lstm_0_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_0")(x) lstm_1_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_1")(lstm_0_output) x = concatenate([lstm_1_output, lstm_0_output, x]) # if return_attention is True in AttentionWeightedAverage, an additional tensor # representing the weight at each timestep is returned weights = None x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x) if return_attention: x, weights = x if not feature_output: # output class probabilities if final_dropout_rate != 0: x = Dropout(final_dropout_rate)(x) if nb_classes > 2: outputs = [Dense(nb_classes, activation='softmax', name='softmax')(x)] else: outputs = [Dense(1, activation='sigmoid', name='softmax')(x)] else: # output penultimate feature vector outputs = [x] if return_attention: # add the attention weights to the outputs if required outputs.append(weights) return Model(inputs=[model_input], outputs=outputs, name="DeepMoji")
df_pred = chart_results(predsteps, predictions, pred_yinv, 1) return error_scores # loss_arr = np.hstack ([ np.array (loss[:,0]), np.array (loss[:,1])]) # plot_loss (loss, "Walk Forward Training") #plt.show() repeats = 1 #timesteps = [5, 10, 15, 25, 50] #regularizers = [L1L2(l1=0.0, l2=0.0), L1L2(l1=0.02, l2=0.0), L1L2(l1=0.01, l2=0.0), L1L2(l1=0.0, l2=0.01), L1L2(l1=0.01, l2=0.01)] #opt = [0.0001, 0.0008, 0.002, 0.005, 0.01, 0.05] timesteps = [25] regularizers = [L1L2(l1=0.01, l2=0.01)] opt = [0.0009] predsteps = 5 train_pct = 0.90 updates = 2 batch_size = 1 nb_epoch = 20 neurons = 100 results = pd.DataFrame() df_close = process_data() diff_values = df_close.apply(difference_pct, args=[1]) for reg in regularizers:
def _prep_embed(self, name="embed"): from tensorflow.keras.regularizers import L1, L2, L1L2 from tensorflow.keras.layers import (Dropout, Dense, BatchNormalization, ReLU) from selectml.tf.layers import (ParallelResidualUnit, ResidualUnit) embed_nlayers = getattr(self, f"{name}_nlayers") embed_residual = getattr(self, f"{name}_residual") embed_nunits = getattr(self, f"{name}_nunits") embed_final_nunits = getattr(self, f"{name}_final_nunits") embed_0_dropout_rate = getattr(self, f"{name}_0_dropout_rate") embed_1_dropout_rate = getattr(self, f"{name}_1_dropout_rate") embed_0_l1_rate = getattr(self, f"{name}_0_l1_rate") embed_1_l1_rate = getattr(self, f"{name}_1_l1_rate") embed_0_l2_rate = getattr(self, f"{name}_0_l2_rate") embed_1_l2_rate = getattr(self, f"{name}_1_l2_rate") embed_activation = getattr(self, f"{name}_activation") if embed_final_nunits is None: embed_final_nunits_ = embed_nunits else: embed_final_nunits_ = embed_final_nunits layers = [] nunits_prev = 0 for i in range(embed_nlayers): dr = embed_0_dropout_rate if (i == 0) else embed_1_dropout_rate layers.append(Dropout(dr, name=f"{self.name}/{name}/dropout.{i}")) l1 = embed_0_l1_rate if (i == 0) else embed_1_l1_rate l2 = embed_0_l2_rate if (i == 0) else embed_1_l2_rate if (l1 > 0.0) and (l2 > 0.0): reg = L1L2(l1, l2) elif (l1 > 0.0): reg = L1(l1) elif (l2 > 0.0): reg = L2(l2) else: reg = None if i >= (embed_nlayers - 1): nunits = embed_final_nunits_ else: nunits = embed_nunits if embed_residual: if nunits != nunits_prev: type_ = ParallelResidualUnit type_name = "parallel_residual_unit" else: type_ = ResidualUnit type_name = "residual_unit" layers.append( type_(nunits, dr, activation=embed_activation, use_bias=True, nonlinear_kernel_regularizer=reg, gain_kernel_regularizer=reg, linear_kernel_regularizer=reg, name=f"{self.name}/{name}/{type_name}.{i}")) else: layers.append( Dense(nunits, activation="linear", kernel_regularizer=reg, use_bias=True, name=f"{self.name}/{name}/dense.{i}")) layers.append( BatchNormalization( name=f"{self.name}/{name}/batchnormalization.{i}")) # The current trend seems to be to do activation after # batch/layer norm. This might change in future. if embed_activation == "relu": layers.append(ReLU(name=f"{self.name}/{name}/relu.{i}")) nunits_prev = nunits return layers