def __init__(self, batchSize, epochs): # Training params self.epochs = epochs self.batchSize = batchSize self.discriminatorOptimizer = Adadelta(1.0) self.combinedOptimizer = Adadelta(0.3) # Model params self.imgDim = (64, 64) self.imgTensorDim = (self.imgDim[0], self.imgDim[1], 1) self.latentSpaceDim = (16, 1) self.discriminatorDropout = 0.3 self.generatorDropout = 0.3 # IO params self.imagePipeline = ImagePipeline(batch_size=batchSize, img_size=self.imgDim) self.saveEvery = 50 self.savePath = ".\\GANmodels\\" self.tensorBoardLogDir = ".\\tensorboardLogs\\" # We generate an image from the same tensor to keep track of the training progress visually self.referenceLatentTensor = self.sampleLatentTensors(1) self.modelsReady = False
def __init__( self, input_shape, number_of_classes, filtres=16, tailleBlock={ 'A': 10, 'B': 3, 'C': 3 }, optimiseur='Nadam', activation='elu', beta=1.1, initializer='he_normal', metrics=['accuracy'], learningR=None, #0.0005, nb_gpu=2): get_custom_objects()['swish'] = swish get_custom_objects()['e_swish'] = e_swish self.input_shape = input_shape self.number_of_classes = number_of_classes self.filtres = filtres self.tailleBlock = tailleBlock #if learningR is not None : self.optimiseur = optimiseur if learningR is not None: self.optimiseur = { 'SGD': SGD(learning_rate=learningR), 'RMSprop': RMSprop(learning_rate=learningR), 'Adagrad': Adagrad(learning_rate=learningR), 'Adadelta': Adadelta(learning_rate=learningR), 'Adam': Adam(learning_rate=learningR), 'Adamax': Adamax(learning_rate=learningR), 'Nadam': Nadam(learning_rate=learningR), }[optimiseur] else: self.optimiseur = { 'SGD': SGD(), 'RMSprop': RMSprop(), 'Adagrad': Adagrad(), 'Adadelta': Adadelta(), 'Adam': Adam(), 'Adamax': Adamax(), 'Nadam': Nadam(), }[optimiseur] self.activation = activation self.initializer = initializer self.nb_gpu = nb_gpu self.metrics = metrics # la valeur 3 indique que les canaux des couleurs sont à la fin # autrement -1 (je n'utilise pas cette syntaxe ) self.channel_axis = 3
def prepare_models(self, optimizer=None, loss=None): self.autoencoder = Model(self.input, self.decoded) self._encoder = Model(self.input, self.encoded) self.z_input = Input(shape=(self.latent_dim,)) self.x_output = self.z_input for layer in self.autoencoder.layers[self.encoder_index:]: self.x_output = layer(self.x_output) self._decoder = Model(self.z_input, self.x_output) if optimizer is None: self.optimizer = Adadelta(self.learning_rate) else: self.optimizer = optimizer if loss is None: self.loss = mse else: self.loss = loss if not self.variational: self.autoencoder.compile(optimizer=self.optimizer, loss=self.loss) else: self.reconstruction_loss = self.input_dim*self.loss(K.flatten(self.input), K.flatten(self.decoded)) self.kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var) self.kl_loss = -0.5*K.sum(self.kl_loss, axis=-1) self.vae_loss = K.mean(self.reconstruction_loss + self.kl_loss) self.autoencoder.add_loss(self.vae_loss) self.autoencoder.compile(optimizer=self.optimizer)
def set_optimizer(self, optimizer_name, lr): """Select the optimizer Parameters ------ optimizer_name: name of the optimizer, either adam, sgd, rmsprop, adagrad, adadelta lr: fload learning rate Raises ------ Exception """ if optimizer_name == 'adam': optimizer = Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) elif optimizer_name == 'sgd': optimizer = SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=False) elif optimizer_name == 'rmsprop': optimizer = RMSprop(lr=lr, rho=0.9, epsilon=None, decay=0.0) elif optimizer_name == 'adagrad': optimizer = Adagrad(lr=lr, epsilon=None, decay=0.0) elif optimizer_name == 'adadelta': optimizer = Adadelta(lr=lr, rho=0.95, epsilon=None, decay=0.0) else: raise Exception('Optimizer unknown') return optimizer
def compile_model(self, optimizer_name='SGD', lr=None): # todo: add kwargs to this method # options to try with tuning optimizer_sgd = SGD(learning_rate=1e-5, momentum=0.0, nesterov=False, name='SGD') # default learning = 0.01 optimizer_adam = Adam(learning_rate=1e-5, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, name='Adam') # 0.001 optimizer_adadelta = Adadelta( learning_rate=lr, rho=0.95, epsilon=1e-07, name='Adadelta') # if lr is None, will the default be used? ''' loss_function = tf.keras.losses.CategoricalCrossentropy( \ from_logits=True, label_smoothing=0, reduction=losses_utils.ReductionV2.AUTO, name='categorical_crossentropy') ''' self.model.compile( loss='sparse_categorical_crossentropy', # optimizer= 'Adadelta', # adapts learning rates based on a moving window of gradient updates, ... # instead of accumulating all past gradients. This way, Adadelta continues learning even when many updates have been done. metrics=['accuracy'] ) # we might prefer to use F1, Precision, or sparse_categorical_crossentropy, crossentropy print('model \n {}'.format(self.model.summary()))
def get_model(self): self.vocabulary_size = self.vectorizer.get_vocabulary_size() self.embedding_matrix = self.vectorizer.get_embedding_matrix() embedding = Embedding(self.vocabulary_size, self.embedding_size, mask_zero=False, trainable=True, weights=None if self.embedding_matrix is None else [self.embedding_matrix]) self.question_input, self.question_output = self.get_question_output( embedding) self.sentence_model = self.get_sentence_model( embedding, question_input=self.question_input, question_output=self.question_output, use_attention=True) self.section_model = self.get_section_model( self.sentence_model, question_input=self.question_input, question_output=self.question_output) self.document_model = self.get_document_model(self.section_model, self.question_output) optimizer = Adadelta() loss_metrics = "binary_crossentropy" self.document_model.compile(loss=loss_metrics, optimizer=optimizer, metrics=[loss_metrics]) self.document_model.summary()
def create_model(batch_size, dropout=0.0, recurrent_state_dropout=0.0): model = Sequential() # model.add(LSTM(128, # return_sequences=True, # batch_input_shape=(batch_size, 3197, 1), # dropout=dropout, # recurrent_dropout=recurrent_state_dropout, # stateful=True)) model.add( LSTM(10, return_sequences=True, dropout=dropout, batch_input_shape=(batch_size, 3197, 1), recurrent_dropout=recurrent_state_dropout, stateful=True)) model.add(Flatten()) model.add(Dense(1)) ada = Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) model.compile(loss='binary_crossentropy', optimizer=ada, metrics=[]) return model
def __init__(self, height, width): self.row = height self.column = width # Regularization term self.reg = 1e-4 self.pi = None self.v = None states = Input(shape=(self.row, self.column, 2)) print('Initializing model:') conv = self.conv_block(states) print('conv:', conv.shape) res = self.res_block(conv, Config.res_blocks) print('res', res.shape) self.pi = self.policy_head(res) print('pi', self.pi.shape) self.v = self.value_head(res) print('v', self.v.shape) self.model = Model(inputs=states, outputs=[self.pi, self.v]) self.model.compile(optimizer=Adadelta(), loss=[categorical_crossentropy, mean_squared_error], loss_weights=[0.5, 0.5], metrics=["accuracy"])
def _get_model(self): """Initializes keras sequential neural network model""" self.model = Sequential() self.model.add(ConvLSTM2D(filters=32, kernel_size=(5,5), input_shape=(self.seq_len,self.processor.lat_len,self.processor.lon_len,self.processor.channel), data_format='channels_last', padding='same', return_sequences=True)) self.model.add(MaxPool3D(pool_size=(1,4,4), padding='valid', data_format='channels_last')) self.model.add(ConvLSTM2D(filters=16, kernel_size=(3,3), data_format='channels_last', padding='same', return_sequences=True)) self.model.add(MaxPool3D(pool_size=(1,4,4), padding='valid', data_format='channels_last')) self.model.add(Flatten()) self.model.add(Dense(256)) self.model.add(Dropout(0.2)) self.model.add(Dense(16)) self.model.add(Dropout(0.2)) self.model.add(Dense(len(self.processor.target_levels))) loss = binary_crossentropy opt = Adadelta() mets = categorical_accuracy self.model.compile(loss=loss,optimizer=opt,metrics=[mets])
def DefineModel(): # Here we build the network model. # This model is made of multiple parts. The first handles the # inputs and identifies common features. The rest are branches with # each determining an output parameter from those features. inputs = Input(shape=(NINPUTS, 1), name='waveform') pedmodel = DefinePedModel(inputs) timmodel = DefineTimeModel(inputs) ampmodel = DefineAmplitudeModel(inputs, pedmodel, timmodel) #commonoutput = DefineCommonOutput([pedmodel,ampmodel,timmodel]) model = Model(inputs=inputs, outputs=[pedmodel, ampmodel, timmodel]) #model = Model(inputs=inputs, outputs=commonoutput) #loss_weights = {'ped_output':1.0/1.0, 'amp_output':1.0/200.0, 'time_output':1.0/40.0} loss_weights = { 'ped_output': 1.0 / 5.0, 'amp_output': 1.0 / 200.0, 'time_output': 1.0 / 40.0 } # Compile the model, possibly using multiple GPUs #opt = Adam(0.001) #opt = Adamax(0.0005) #opt = Adadelta(learning_rate=0.01, rho=0.98, clipnorm=1.0) opt = Adadelta(learning_rate=0.01, rho=0.98) #opt = SGD() model.compile(loss='mse', loss_weights=loss_weights, optimizer=opt, metrics=['mae', 'mse']) return model
def init_model(input_shape): """ Returns the built and compiled Keras model :param input_shape: tuple(num_examples, num_frequency_bins, num_time_frames, num_channels) :return: keras model """ num_classes = len(cf.dataset.classes) # Construct model model = Sequential(name='spectrum_cnn_3') model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, kernel_size=(3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add( Conv2D(128, kernel_size=(3, 3), strides=(2, 1), activation='relu')) model.add( Conv2D(256, kernel_size=(3, 3), strides=(2, 1), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) # Compile model model.compile(loss=categorical_crossentropy, optimizer=Adadelta(learning_rate=1.0), metrics=['accuracy']) return model
def get_model(list): model_base = tensorflow.keras.applications.xception.Xception( include_top=False, input_shape=(*(71, 71), 3), weights='imagenet') output = Flatten()(model_base.output) output = BatchNormalization()(output) output = Dropout(0.5)(output) output = Dense(128, activation='relu')(output) output = BatchNormalization()(output) output = Dropout(0.5)(output) output = Dense(len(list), activation='softmax')(output) model = Model(model_base.input, output) for layer in model_base.layers: layer.trainable = True model.summary(line_length=200) import pydot pydot.find_graphviz = lambda: True from tensorflow.keras.utils import plot_model plot_model(model, show_shapes=True, to_file='C:/CAR/LOG/model_pdfs/{}.pdf'.format('Xception')) ada = Adadelta(lr=0.1, rho=0.95, epsilon=1e-08) model.compile(optimizer=ada, loss='categorical_crossentropy', metrics=['accuracy']) return model
def model_v2(input=(16, 16, 16, 3), classQTY=10, rate=0.05): input_layer = Input(input) X = Conv3D(filters=8, kernel_size=(3, 3, 3), activation='relu')(input_layer) X = Conv3D(filters=16, kernel_size=(3, 3, 3), activation='relu')(X) X = MaxPool3D(pool_size=(2, 2, 2))(X) X = Conv3D(filters=24, kernel_size=(3, 3, 3), activation='relu')(X) X = Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu')(X) X = MaxPool3D(pool_size=(2, 2, 2))(X) X = Flatten()(X) X = Dense(units=2048, activation='relu')(X) X = Dropout(0.4)(X) X = Dense(units=512, activation='relu')(X) X = Dropout(0.4)(X) output_layer = Dense(units=classQTY, activation='softmax')(X) model = Model(inputs=input_layer, outputs=output_layer, name='3DCNN_v2') #opt = SGD(lr=0.005, momentum=0.9) #model.compile( optimizer= opt, loss='categorical_crossentropy', metrics=['accuracy']) model.compile(optimizer=Adadelta(lr=0.05), loss='categorical_crossentropy', metrics=['accuracy']) return model
def create_model(input_shape): """Create a convolutional neural network and an image data generator. Build and compile a sequential CNN for handwritten digits recognition. The returned CNN is ready for training. Also create an image data generator to train the model on. """ # Model building model = Sequential([ layers.Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape), layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), layers.MaxPooling2D(pool_size=(2, 2)), layers.Dropout(0.25), layers.Flatten(), layers.Dense(128, activation="relu"), layers.Dropout(0.5), layers.Dense(10, activation="softmax") ]) # Model compilation model.compile(loss=categorical_crossentropy, optimizer=Adadelta(), metrics=['accuracy']) # Image data generator datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.1, fill_mode='nearest') return (model, datagen)
def init_nn(self, input_size, hidden_layers_dim): """Initializes the neural sequential model by adding layers and compiling the model. There is no call to fit(), because the eligibilities need to be applied to the gradients before the gradients can be used to update the model weights. This is done in split-gd""" opt = Adadelta( learning_rate=self.learning_rate ) # Adagrad is well-suited for dealing with sparse data, Adadelta is extension that solves problem of shrinking learning rate loss = MeanSquaredError( ) # Larger errors should be penalized more than smaller ones model = KER.models.Sequential() model.add( KER.layers.Dense(input_size, activation="relu", input_shape=(input_size, )) ) # input layer expect one-dimensional array with input_size elements for input. This will automatically build network for i in range(len(hidden_layers_dim)): model.add(KER.layers.Dense( hidden_layers_dim[i], activation="relu")) # relu gives quick convergence model.add( KER.layers.Dense(1) ) # Observation: no activation function gives quicker convergence (could use linear) model.compile(optimizer=opt, loss=loss, metrics=[ "mean_squared_error" ]) # MSE is one ot the most preferred metrics for regression tasks # model.summary() return model
def multitask_attention_model(output_size, pos_vocab_size, lex_vocab_size, config_params, visualize=False, plot=False): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) embedding_size = 768 max_seq_len = 512 in_id = Input(shape=(max_seq_len, ), name="input_ids") in_mask = Input(shape=(max_seq_len, ), name="input_masks") in_segment = Input(shape=(max_seq_len, ), name="segment_ids") bert_inputs = [in_id, in_mask, in_segment] bert_output_ = BertEmbeddingLayer(n_fine_tune_layers=3, pooling="mean")(bert_inputs) bert_output = Reshape((max_seq_len, embedding_size))(bert_output_) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') bert_inputs.append(in_mask) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(bert_output) attention = SeqSelfAttention(units=128, attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) logits_mask = Add()([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(attention) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(attention) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=bert_inputs, outputs=[wsd_output, pos_output, lex_output], name='Bert_BiLSTM_ATT_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def __init__(self, emdim, max_passage_length=None, max_query_length=None, num_highway_layers=2, num_decoders=1, encoder_dropout=0, decoder_dropout=0): self.emdim = emdim self.max_passage_length = max_passage_length self.max_query_length = max_query_length passage_input = Input(shape=(self.max_passage_length, emdim), dtype='float32', name="passage_input") question_input = Input(shape=(self.max_query_length, emdim), dtype='float32', name="question_input") question_embedding = question_input passage_embedding = passage_input for i in range(num_highway_layers): highway_layer = Highway(name='highway_{}'.format(i)) question_layer = TimeDistributed(highway_layer, name=highway_layer.name + "_qtd") question_embedding = question_layer(question_embedding) passage_layer = TimeDistributed(highway_layer, name=highway_layer.name + "_ptd") passage_embedding = passage_layer(passage_embedding) encoder_layer = Bidirectional(LSTM(emdim, recurrent_dropout=encoder_dropout, return_sequences=True), name='bidirectional_encoder') encoded_question = encoder_layer(question_embedding) encoded_passage = encoder_layer(passage_embedding) similarity_matrix = Similarity(name='similarity_layer')([encoded_passage, encoded_question]) context_to_query_attention = C2QAttention(name='context_to_query_attention')([ similarity_matrix, encoded_question]) query_to_context_attention = Q2CAttention(name='query_to_context_attention')([ similarity_matrix, encoded_passage]) merged_context = MergedContext(name='merged_context')( [encoded_passage, context_to_query_attention, query_to_context_attention]) modeled_passage = merged_context for i in range(num_decoders): hidden_layer = Bidirectional(LSTM(emdim, recurrent_dropout=decoder_dropout, return_sequences=True), name='bidirectional_decoder_{}'.format(i)) modeled_passage = hidden_layer(modeled_passage) span_begin_probabilities = SpanBegin(name='span_begin')([merged_context, modeled_passage]) span_end_probabilities = SpanEnd(name='span_end')( [encoded_passage, merged_context, modeled_passage, span_begin_probabilities]) output = CombineOutputs(name='combine_outputs')([span_begin_probabilities, span_end_probabilities]) model = Model([passage_input, question_input], [output]) model.summary() try: model = ModelMGPU(model) except: pass adadelta = Adadelta(lr=0.01) model.compile(loss=negative_avg_log_error, optimizer=adadelta, metrics=[accuracy]) self.model = model
def compile(self, **kwargs): optimizer = Adadelta() loss = 'mean_square_error' if 'optimizer' in kwargs: optimizer = kwargs.get('optimizer') del kwargs['optimizer'] if 'loss' in kwargs: loss = kwargs.get('loss') del kwargs['loss'] self.model.compile(optimizer=optimizer, loss=loss, **kwargs)
def _build_dqn_model(_env_st_size, _env_ac_size, _learning_rate, _layers, _optimizer, _initializer, _name=None): """ Builds a deep neural net which predicts the Q values for all possible actions given a state. The input should have the shape of the state, and the output should have the same shape as the action space since we want 1 Q value per possible action. :return: Q network """ state_size = _env_st_size action_size = _env_ac_size n_network = Sequential(name=_name if (_name is not None) else None) # build with no. of layers given n_network.add( Dense(_layers[0], input_dim=state_size, activation='relu', kernel_initializer=_initializer)) for l in range(1, len(_layers)): n_network.add( Dense(_layers[l], activation='relu', kernel_initializer=_initializer)) # output layer with fixed (action_size) output size. n_network.add( Dense(action_size, activation='linear', kernel_initializer=_initializer)) if _optimizer is "RMSprop": n_network.compile(loss=custom_loss, optimizer=RMSprop(lr=_learning_rate), metrics=['mae']) elif _optimizer is "SGD": n_network.compile(loss=custom_loss, optimizer=SGD(lr=_learning_rate), metrics=['mae']) elif _optimizer is "Adam": n_network.compile(loss=custom_loss, optimizer=Adam(lr=_learning_rate), metrics=['mae']) elif _optimizer is "Adadelta": n_network.compile(loss=custom_loss, optimizer=Adadelta(lr=_learning_rate), metrics=['mae']) return n_network
def attention_model(vocabulary_size, config_params, output_size, pos_vocab_size, lex_vocab_size, visualize=False, plot=False, tokenizer=None): hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size) if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size) ), merge_mode='sum')(embedding) attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([logits, in_mask]) pos_logits = TimeDistributed(Dense(pos_vocab_size), name='POS_logits')(attention) lex_logits = TimeDistributed(Dense(lex_vocab_size), name='LEX_logits')(attention) wsd_output = Softmax(name="WSD_output")(logits_mask) pos_output = Softmax(name="POS_output")(pos_logits) lex_output = Softmax(name="LEX_output")(lex_logits) model = Model(inputs=[in_sentences, in_mask], outputs=[wsd_output, pos_output, lex_output], name='BiLSTM_ATT_MultiTask') model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def compile(self, gamma=0.1, loss=['mse'], *args, **kwargs): #optimizer = Adam(lr=0.01) optimizer = Adadelta(lr=0.1) clustering_loss = [self.ss_loss()] # ['kld'] #optimizer='adadelta' self._model.compile( loss=clustering_loss + loss * (len(self._model.outputs) - 1), # capture multioutput models loss_weights=[gamma] + [1. for _ in range(len(self._model.outputs) - 1)], optimizer=optimizer)
def compile_model(self, verbose=1): self.model.compile(loss='categorical_crossentropy', optimizer=Adadelta(), metrics=['accuracy']) self.model.fit(self.X_train, self.y_train, batch_size=self.batch_size, epochs=self.epochs, verbose=verbose, validation_data=(self.X_test, self.y_test))
def baseline_model(vocabulary_size, config_params, output_size, tokenizer=None, visualize=False, plot=False): name = 'Baseline' hidden_size = int(config_params['hidden_size']) batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None, ), dtype=input_type, batch_size=batch_size, name='Input') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 name = f'Elmo_{name}' else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size)), merge_mode='sum')(embedding) logits = TimeDistributed(Dense(output_size))(bilstm) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') logits_mask = Add()([logits, in_mask]) output = Softmax()(logits_mask) model = Model(inputs=[in_sentences, in_mask], outputs=output, name=name) model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def attention_model(vocabulary_size, config_params, output_size, weights=None, tokenizer=None, visualize=False, plot=False): hidden_size = config_params['hidden_size'] batch_size = int(config_params['batch_size']) input_type = 'string' if tokenizer is not None else None in_sentences = Input(shape=(None,), dtype=input_type, batch_size=batch_size) in_mask = Input(shape=(None, output_size), batch_size=batch_size, name='Candidate_Synsets_Mask') if tokenizer is not None: embedding = ElmoEmbeddingLayer()(in_sentences) embedding_size = 1024 elif weights is not None: embedding_size = weights.shape[1] train = False # To fine-tune pretrained embeddings or not embedding = Embedding(input_dim=output_size, output_dim=embedding_size, weights=[weights], trainable=train, mask_zero=True)(in_sentences) else: embedding_size = int(config_params['embedding_size']) embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_size, mask_zero=True, name="Embeddings")(in_sentences) bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, input_shape=(None, None, embedding_size) ), merge_mode='sum')(embedding) attention = SeqSelfAttention(attention_activation='sigmoid', name='Attention')(bilstm) logits = TimeDistributed(Dense(output_size))(attention) logits_mask = Add()([logits, in_mask]) output = Softmax()(logits_mask) model = Model(inputs=[in_sentences, in_mask], outputs=output, name="SensEmbed_Attention") model.compile(loss="sparse_categorical_crossentropy", optimizer=Adadelta(), metrics=['acc']) visualize_plot_mdl(visualize, plot, model) return model
def __init__(self, output_dir, key): # Variables to hold the description of the experiment self.config_description = "This is the template config file." # System dependent variable self._workers = 1 self._multiprocessing = False self._gpus = 1 self._displayer = MNISTDisplayer() # Variables for comet.ml self._project_name = "my_project" self._workspace = "my_workspace" self.output_dir = join(output_dir, "{}_{}_{}".format(self.workspace, self.project_name, key)) # Network variables self.num_classes = 10 self.img_size = (28, 28) self._weights = None self._network = MNISTExample(self.num_classes) # Training variables self._epochs = 5 self._batch_size = 128 self._steps_per_epoch = 60000 // 128 self._optimizer = Adadelta() self._loss = categorical_crossentropy self._metrics = ['accuracy'] self._callbacks = [] self.early_stopping_params = {"monitor":'val_loss', "min_delta":0, "patience":7} self.reduce_lr_on_plateau_params = {"monitor":'val_loss', "factor":0.1, "patience":5} self.tensorboard = TensorBoard(join(self.output_dir, "checkpoints/logs")) self.terminate_on_nan = TerminateOnNaN() self.early_stopping = EarlyStopping(**self.early_stopping_params) self.reduce_lr_on_plateau = ReduceLROnPlateau(**self.reduce_lr_on_plateau_params) self.model_checkpoint = ModelCheckpoint(filepath=join(self.output_dir, "checkpoints", "cp-{epoch:04d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.ckpt"), verbose=1, save_best_only=True, save_weights_only=True) self._callbacks = [self.tensorboard, self.terminate_on_nan, self.early_stopping, self.reduce_lr_on_plateau, self.model_checkpoint] # Creating the training and validation generator (you may want to move these to the prepare functions) train_data, validation_data = mnist.load_data() self._train_generator = MNISTGenerator(train_data, self.batch_size) self._validation_generator = MNISTGenerator(validation_data, self.batch_size) # Dummy test for example self._test_generator = MNISTGenerator(validation_data, self.batch_size) self._evaluator = None self._displayer = MNISTDisplayer()
def get_optimizer(self) -> Optimizer: """ Returns the configured optimizer for this configuration :return: """ if self.optimizer == "SGD": return SGD(lr=self.learning_rate, momentum=self.nesterov_momentum, nesterov=True) if self.optimizer == "Adam": return Adam() if self.optimizer == "Adadelta": return Adadelta() raise Exception("Invalid optimizer {0} requested".format(self.optimizer))
def get_optimizer(optim="adam", learning_rate=1e-3): if optim == "adam": return Adam(learning_rate=learning_rate) elif optim == "adagrad": return Adagrad(learning_rate=learning_rate) elif optim == "sgd": return SGD(learning_rate=learning_rate) elif optim == "rmsprop": return RMSprop(learning_rate=learning_rate) elif optim == "adadelta": return Adadelta(learning_rate=learning_rate) else: logger.error(f"Invalid optim {optim}") os._exit(0)
def train_dense_model(x, y): model = Sequential() model.add(Dense(64, activation='relu', input_shape=(128, ))) model.add(Dropout(0.5)) model.add(Dense(32, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_categories, activation='softmax')) model.compile(Adadelta(), loss=categorical_crossentropy, metrics=['accuracy']) model.fit(x, y, batch_size=batch_size, epochs=epochs) return model
def get_optimizer(optimizer): if optimizer == "sdg": return SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) if optimizer == "rmsprop": return RMSprop(learning_rate=0.01) if optimizer == "adam": return Adam(learning_rate=0.01) if optimizer == "adagrad": return Adagrad(learning_rate=0.01) if optimizer == "adadelta": return Adadelta(learning_rate=1.0)
def get_model(self): embedding = Embedding(5000, 300, mask_zero=True, trainable=True) self.question_model = self.get_text_model(embedding) self.sentence_model = self.get_text_model(embedding, use_attention=True) self.section_model = self.get_section_model(self.sentence_model, self.question_model) self.document_model = self.get_document_model(self.section_model, self.question_model) optimizer = Adadelta() loss_metrics = "binary_crossentropy" self.document_model.compile(loss=loss_metrics, optimizer=optimizer, metrics=[loss_metrics]) self.document_model.summary()