예제 #1
0
    def __init__(self, batchSize, epochs):

        # Training params
        self.epochs = epochs
        self.batchSize = batchSize
        self.discriminatorOptimizer = Adadelta(1.0)
        self.combinedOptimizer = Adadelta(0.3)

        # Model params
        self.imgDim = (64, 64)
        self.imgTensorDim = (self.imgDim[0], self.imgDim[1], 1)
        self.latentSpaceDim = (16, 1)
        self.discriminatorDropout = 0.3
        self.generatorDropout = 0.3

        # IO params
        self.imagePipeline = ImagePipeline(batch_size=batchSize,
                                           img_size=self.imgDim)
        self.saveEvery = 50
        self.savePath = ".\\GANmodels\\"
        self.tensorBoardLogDir = ".\\tensorboardLogs\\"
        # We generate an image from the same tensor to keep track of the training progress visually
        self.referenceLatentTensor = self.sampleLatentTensors(1)

        self.modelsReady = False
예제 #2
0
    def __init__(
            self,
            input_shape,
            number_of_classes,
            filtres=16,
            tailleBlock={
                'A': 10,
                'B': 3,
                'C': 3
            },
            optimiseur='Nadam',
            activation='elu',
            beta=1.1,
            initializer='he_normal',
            metrics=['accuracy'],
            learningR=None,  #0.0005,
            nb_gpu=2):

        get_custom_objects()['swish'] = swish
        get_custom_objects()['e_swish'] = e_swish

        self.input_shape = input_shape
        self.number_of_classes = number_of_classes
        self.filtres = filtres
        self.tailleBlock = tailleBlock

        #if learningR is not None :
        self.optimiseur = optimiseur
        if learningR is not None:
            self.optimiseur = {
                'SGD': SGD(learning_rate=learningR),
                'RMSprop': RMSprop(learning_rate=learningR),
                'Adagrad': Adagrad(learning_rate=learningR),
                'Adadelta': Adadelta(learning_rate=learningR),
                'Adam': Adam(learning_rate=learningR),
                'Adamax': Adamax(learning_rate=learningR),
                'Nadam': Nadam(learning_rate=learningR),
            }[optimiseur]
        else:
            self.optimiseur = {
                'SGD': SGD(),
                'RMSprop': RMSprop(),
                'Adagrad': Adagrad(),
                'Adadelta': Adadelta(),
                'Adam': Adam(),
                'Adamax': Adamax(),
                'Nadam': Nadam(),
            }[optimiseur]

        self.activation = activation
        self.initializer = initializer
        self.nb_gpu = nb_gpu
        self.metrics = metrics

        # la valeur 3 indique que les canaux des couleurs sont à la fin
        # autrement -1 (je n'utilise pas cette syntaxe )
        self.channel_axis = 3
예제 #3
0
    def prepare_models(self, optimizer=None, loss=None):

        self.autoencoder = Model(self.input, self.decoded)
        self._encoder = Model(self.input, self.encoded)
        self.z_input = Input(shape=(self.latent_dim,))
        self.x_output = self.z_input
        for layer in self.autoencoder.layers[self.encoder_index:]:
            self.x_output = layer(self.x_output)
        
        self._decoder = Model(self.z_input, self.x_output)
        
        if optimizer is None:
            self.optimizer = Adadelta(self.learning_rate)
        else:
            self.optimizer = optimizer
            
        if loss is None:
            self.loss = mse
        else:
            self.loss = loss
            
        if not self.variational:
            self.autoencoder.compile(optimizer=self.optimizer, loss=self.loss)
        else:
            self.reconstruction_loss = self.input_dim*self.loss(K.flatten(self.input), K.flatten(self.decoded))
            self.kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var)
            self.kl_loss = -0.5*K.sum(self.kl_loss, axis=-1)
            self.vae_loss = K.mean(self.reconstruction_loss + self.kl_loss)
            self.autoencoder.add_loss(self.vae_loss)
            self.autoencoder.compile(optimizer=self.optimizer)
예제 #4
0
    def set_optimizer(self, optimizer_name, lr):
        """Select the optimizer

        Parameters
        ------
        optimizer_name: 
            name of the optimizer, either adam, sgd, rmsprop, adagrad, adadelta
        lr: fload
            learning rate
            
        Raises
        ------
        Exception
        """

        if optimizer_name == 'adam':
            optimizer = Adam(lr=lr,
                             beta_1=0.9,
                             beta_2=0.999,
                             epsilon=None,
                             decay=0.0,
                             amsgrad=False)
        elif optimizer_name == 'sgd':
            optimizer = SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=False)
        elif optimizer_name == 'rmsprop':
            optimizer = RMSprop(lr=lr, rho=0.9, epsilon=None, decay=0.0)
        elif optimizer_name == 'adagrad':
            optimizer = Adagrad(lr=lr, epsilon=None, decay=0.0)
        elif optimizer_name == 'adadelta':
            optimizer = Adadelta(lr=lr, rho=0.95, epsilon=None, decay=0.0)
        else:
            raise Exception('Optimizer unknown')

        return optimizer
예제 #5
0
    def compile_model(self, optimizer_name='SGD', lr=None):

        # todo: add kwargs to this method
        # options to try with tuning
        optimizer_sgd = SGD(learning_rate=1e-5,
                            momentum=0.0,
                            nesterov=False,
                            name='SGD')  # default learning = 0.01
        optimizer_adam = Adam(learning_rate=1e-5,
                              beta_1=0.9,
                              beta_2=0.999,
                              epsilon=1e-07,
                              amsgrad=False,
                              name='Adam')  # 0.001
        optimizer_adadelta = Adadelta(
            learning_rate=lr, rho=0.95, epsilon=1e-07,
            name='Adadelta')  # if lr is None, will the default be used?
        '''
        loss_function = tf.keras.losses.CategoricalCrossentropy( \
                        from_logits=True, label_smoothing=0, reduction=losses_utils.ReductionV2.AUTO,
                        name='categorical_crossentropy')
        '''

        self.model.compile(
            loss='sparse_categorical_crossentropy',  # 
            optimizer=
            'Adadelta',  # adapts learning rates based on a moving window of gradient updates, ...
            # instead of accumulating all past gradients. This way, Adadelta continues learning even when many updates have been done.
            metrics=['accuracy']
        )  # we might prefer to use F1, Precision, or sparse_categorical_crossentropy, crossentropy

        print('model \n {}'.format(self.model.summary()))
예제 #6
0
    def get_model(self):
        self.vocabulary_size = self.vectorizer.get_vocabulary_size()
        self.embedding_matrix = self.vectorizer.get_embedding_matrix()

        embedding = Embedding(self.vocabulary_size,
                              self.embedding_size,
                              mask_zero=False,
                              trainable=True,
                              weights=None if self.embedding_matrix is None
                              else [self.embedding_matrix])

        self.question_input, self.question_output = self.get_question_output(
            embedding)
        self.sentence_model = self.get_sentence_model(
            embedding,
            question_input=self.question_input,
            question_output=self.question_output,
            use_attention=True)

        self.section_model = self.get_section_model(
            self.sentence_model,
            question_input=self.question_input,
            question_output=self.question_output)
        self.document_model = self.get_document_model(self.section_model,
                                                      self.question_output)

        optimizer = Adadelta()

        loss_metrics = "binary_crossentropy"

        self.document_model.compile(loss=loss_metrics,
                                    optimizer=optimizer,
                                    metrics=[loss_metrics])
        self.document_model.summary()
예제 #7
0
def create_model(batch_size, dropout=0.0, recurrent_state_dropout=0.0):
    model = Sequential()
    # model.add(LSTM(128,
    #                return_sequences=True,
    #                batch_input_shape=(batch_size, 3197, 1),
    #                dropout=dropout,
    #                recurrent_dropout=recurrent_state_dropout,
    #                stateful=True))

    model.add(
        LSTM(10,
             return_sequences=True,
             dropout=dropout,
             batch_input_shape=(batch_size, 3197, 1),
             recurrent_dropout=recurrent_state_dropout,
             stateful=True))

    model.add(Flatten())

    model.add(Dense(1))

    ada = Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)

    model.compile(loss='binary_crossentropy', optimizer=ada, metrics=[])
    return model
예제 #8
0
    def __init__(self, height, width):
        self.row = height
        self.column = width

        # Regularization term
        self.reg = 1e-4

        self.pi = None
        self.v = None

        states = Input(shape=(self.row, self.column, 2))
        print('Initializing model:')

        conv = self.conv_block(states)
        print('conv:', conv.shape)

        res = self.res_block(conv, Config.res_blocks)
        print('res', res.shape)

        self.pi = self.policy_head(res)
        print('pi', self.pi.shape)

        self.v = self.value_head(res)
        print('v', self.v.shape)

        self.model = Model(inputs=states, outputs=[self.pi, self.v])

        self.model.compile(optimizer=Adadelta(),
                           loss=[categorical_crossentropy, mean_squared_error],
                           loss_weights=[0.5, 0.5],
                           metrics=["accuracy"])
예제 #9
0
    def _get_model(self):
        """Initializes keras sequential neural network model"""

        self.model = Sequential()
        self.model.add(ConvLSTM2D(filters=32,
                                      kernel_size=(5,5),
                                      input_shape=(self.seq_len,self.processor.lat_len,self.processor.lon_len,self.processor.channel),
                                      data_format='channels_last',
                                      padding='same',
                                      return_sequences=True))
        self.model.add(MaxPool3D(pool_size=(1,4,4),
                                     padding='valid',
                                     data_format='channels_last'))
        self.model.add(ConvLSTM2D(filters=16,
                                       kernel_size=(3,3),
                                       data_format='channels_last',
                                       padding='same',
                                       return_sequences=True))
        self.model.add(MaxPool3D(pool_size=(1,4,4),
                                     padding='valid',
                                     data_format='channels_last'))
        self.model.add(Flatten())
        self.model.add(Dense(256))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(16))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(len(self.processor.target_levels)))

        loss = binary_crossentropy
        opt = Adadelta()
        mets = categorical_accuracy
        self.model.compile(loss=loss,optimizer=opt,metrics=[mets])
예제 #10
0
def DefineModel():

    # Here we build the network model.
    # This model is made of multiple parts. The first handles the
    # inputs and identifies common features. The rest are branches with
    # each determining an output parameter from those features.
    inputs = Input(shape=(NINPUTS, 1), name='waveform')
    pedmodel = DefinePedModel(inputs)
    timmodel = DefineTimeModel(inputs)
    ampmodel = DefineAmplitudeModel(inputs, pedmodel, timmodel)
    #commonoutput = DefineCommonOutput([pedmodel,ampmodel,timmodel])

    model = Model(inputs=inputs, outputs=[pedmodel, ampmodel, timmodel])
    #model          = Model(inputs=inputs, outputs=commonoutput)

    #loss_weights = {'ped_output':1.0/1.0, 'amp_output':1.0/200.0, 'time_output':1.0/40.0}
    loss_weights = {
        'ped_output': 1.0 / 5.0,
        'amp_output': 1.0 / 200.0,
        'time_output': 1.0 / 40.0
    }

    # Compile the model, possibly using multiple GPUs
    #opt = Adam(0.001)
    #opt = Adamax(0.0005)
    #opt = Adadelta(learning_rate=0.01, rho=0.98, clipnorm=1.0)
    opt = Adadelta(learning_rate=0.01, rho=0.98)
    #opt = SGD()
    model.compile(loss='mse',
                  loss_weights=loss_weights,
                  optimizer=opt,
                  metrics=['mae', 'mse'])

    return model
예제 #11
0
def init_model(input_shape):
    """
    Returns the built and compiled Keras model
    :param input_shape: tuple(num_examples, num_frequency_bins, num_time_frames, num_channels)
    :return: keras model
    """
    num_classes = len(cf.dataset.classes)

    # Construct model
    model = Sequential(name='spectrum_cnn_3')
    model.add(
        Conv2D(32,
               kernel_size=(3, 3),
               activation='relu',
               input_shape=input_shape))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(
        Conv2D(128, kernel_size=(3, 3), strides=(2, 1), activation='relu'))
    model.add(
        Conv2D(256, kernel_size=(3, 3), strides=(2, 1), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    # Compile model
    model.compile(loss=categorical_crossentropy,
                  optimizer=Adadelta(learning_rate=1.0),
                  metrics=['accuracy'])

    return model
예제 #12
0
def get_model(list):
    model_base = tensorflow.keras.applications.xception.Xception(
        include_top=False, input_shape=(*(71, 71), 3), weights='imagenet')
    output = Flatten()(model_base.output)

    output = BatchNormalization()(output)
    output = Dropout(0.5)(output)
    output = Dense(128, activation='relu')(output)
    output = BatchNormalization()(output)
    output = Dropout(0.5)(output)
    output = Dense(len(list), activation='softmax')(output)
    model = Model(model_base.input, output)
    for layer in model_base.layers:
        layer.trainable = True
    model.summary(line_length=200)
    import pydot
    pydot.find_graphviz = lambda: True
    from tensorflow.keras.utils import plot_model
    plot_model(model,
               show_shapes=True,
               to_file='C:/CAR/LOG/model_pdfs/{}.pdf'.format('Xception'))
    ada = Adadelta(lr=0.1, rho=0.95, epsilon=1e-08)
    model.compile(optimizer=ada,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model
예제 #13
0
def model_v2(input=(16, 16, 16, 3), classQTY=10, rate=0.05):

    input_layer = Input(input)

    X = Conv3D(filters=8, kernel_size=(3, 3, 3),
               activation='relu')(input_layer)
    X = Conv3D(filters=16, kernel_size=(3, 3, 3), activation='relu')(X)
    X = MaxPool3D(pool_size=(2, 2, 2))(X)

    X = Conv3D(filters=24, kernel_size=(3, 3, 3), activation='relu')(X)
    X = Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu')(X)
    X = MaxPool3D(pool_size=(2, 2, 2))(X)
    X = Flatten()(X)

    X = Dense(units=2048, activation='relu')(X)
    X = Dropout(0.4)(X)
    X = Dense(units=512, activation='relu')(X)
    X = Dropout(0.4)(X)
    output_layer = Dense(units=classQTY, activation='softmax')(X)

    model = Model(inputs=input_layer, outputs=output_layer, name='3DCNN_v2')
    #opt = SGD(lr=0.005, momentum=0.9)
    #model.compile( optimizer= opt, loss='categorical_crossentropy', metrics=['accuracy'])
    model.compile(optimizer=Adadelta(lr=0.05),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model
예제 #14
0
def create_model(input_shape):
    """Create a convolutional neural network and an image data generator.
    Build and compile a sequential CNN for handwritten digits recognition. The returned CNN is ready for training.
    Also create an image data generator to train the model on.
    """
    # Model building
    model = Sequential([
        layers.Conv2D(32,
                      kernel_size=(3, 3),
                      activation="relu",
                      input_shape=input_shape),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),
        layers.Flatten(),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(10, activation="softmax")
    ])
    # Model compilation
    model.compile(loss=categorical_crossentropy,
                  optimizer=Adadelta(),
                  metrics=['accuracy'])
    # Image data generator
    datagen = ImageDataGenerator(rotation_range=10,
                                 width_shift_range=0.1,
                                 height_shift_range=0.1,
                                 shear_range=0.2,
                                 zoom_range=0.1,
                                 fill_mode='nearest')
    return (model, datagen)
예제 #15
0
 def init_nn(self, input_size, hidden_layers_dim):
     """Initializes the neural sequential model by adding layers and compiling the model.
     There is no call to fit(), because the eligibilities need to be applied to the gradients
      before the gradients can be used to update the model weights. This is done in split-gd"""
     opt = Adadelta(
         learning_rate=self.learning_rate
     )  # Adagrad is well-suited for dealing with sparse data, Adadelta is extension that solves problem of shrinking learning rate
     loss = MeanSquaredError(
     )  # Larger errors should be penalized more than smaller ones
     model = KER.models.Sequential()
     model.add(
         KER.layers.Dense(input_size,
                          activation="relu",
                          input_shape=(input_size, ))
     )  # input layer expect one-dimensional array with input_size elements for input. This will automatically build network
     for i in range(len(hidden_layers_dim)):
         model.add(KER.layers.Dense(
             hidden_layers_dim[i],
             activation="relu"))  # relu gives quick convergence
     model.add(
         KER.layers.Dense(1)
     )  # Observation: no activation function gives quicker convergence (could use linear)
     model.compile(optimizer=opt, loss=loss, metrics=[
         "mean_squared_error"
     ])  # MSE is one ot the most preferred metrics for regression tasks
     # model.summary()
     return model
def multitask_attention_model(output_size,
                              pos_vocab_size,
                              lex_vocab_size,
                              config_params,
                              visualize=False,
                              plot=False):
    hidden_size = int(config_params['hidden_size'])
    batch_size = int(config_params['batch_size'])
    embedding_size = 768
    max_seq_len = 512

    in_id = Input(shape=(max_seq_len, ), name="input_ids")
    in_mask = Input(shape=(max_seq_len, ), name="input_masks")
    in_segment = Input(shape=(max_seq_len, ), name="segment_ids")
    bert_inputs = [in_id, in_mask, in_segment]

    bert_output_ = BertEmbeddingLayer(n_fine_tune_layers=3,
                                      pooling="mean")(bert_inputs)
    bert_output = Reshape((max_seq_len, embedding_size))(bert_output_)

    in_mask = Input(shape=(None, output_size),
                    batch_size=batch_size,
                    name='Candidate_Synsets_Mask')
    bert_inputs.append(in_mask)

    bilstm = Bidirectional(LSTM(hidden_size,
                                dropout=0.2,
                                recurrent_dropout=0.2,
                                return_sequences=True,
                                input_shape=(None, None, embedding_size)),
                           merge_mode='sum')(bert_output)

    attention = SeqSelfAttention(units=128,
                                 attention_activation='sigmoid',
                                 name='Attention')(bilstm)

    logits = TimeDistributed(Dense(output_size))(attention)
    logits_mask = Add()([logits, in_mask])

    pos_logits = TimeDistributed(Dense(pos_vocab_size),
                                 name='POS_logits')(attention)
    lex_logits = TimeDistributed(Dense(lex_vocab_size),
                                 name='LEX_logits')(attention)

    wsd_output = Softmax(name="WSD_output")(logits_mask)
    pos_output = Softmax(name="POS_output")(pos_logits)
    lex_output = Softmax(name="LEX_output")(lex_logits)

    model = Model(inputs=bert_inputs,
                  outputs=[wsd_output, pos_output, lex_output],
                  name='Bert_BiLSTM_ATT_MultiTask')

    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=Adadelta(),
                  metrics=['acc'])

    visualize_plot_mdl(visualize, plot, model)

    return model
예제 #17
0
    def __init__(self, emdim, max_passage_length=None, max_query_length=None, num_highway_layers=2, num_decoders=1,
                 encoder_dropout=0, decoder_dropout=0):
        self.emdim = emdim
        self.max_passage_length = max_passage_length
        self.max_query_length = max_query_length

        passage_input = Input(shape=(self.max_passage_length, emdim), dtype='float32', name="passage_input")
        question_input = Input(shape=(self.max_query_length, emdim), dtype='float32', name="question_input")

        question_embedding = question_input
        passage_embedding = passage_input
        for i in range(num_highway_layers):
            highway_layer = Highway(name='highway_{}'.format(i))
            question_layer = TimeDistributed(highway_layer, name=highway_layer.name + "_qtd")
            question_embedding = question_layer(question_embedding)
            passage_layer = TimeDistributed(highway_layer, name=highway_layer.name + "_ptd")
            passage_embedding = passage_layer(passage_embedding)

        encoder_layer = Bidirectional(LSTM(emdim, recurrent_dropout=encoder_dropout,
                                           return_sequences=True), name='bidirectional_encoder')
        encoded_question = encoder_layer(question_embedding)
        encoded_passage = encoder_layer(passage_embedding)

        similarity_matrix = Similarity(name='similarity_layer')([encoded_passage, encoded_question])

        context_to_query_attention = C2QAttention(name='context_to_query_attention')([
            similarity_matrix, encoded_question])
        query_to_context_attention = Q2CAttention(name='query_to_context_attention')([
            similarity_matrix, encoded_passage])

        merged_context = MergedContext(name='merged_context')(
            [encoded_passage, context_to_query_attention, query_to_context_attention])

        modeled_passage = merged_context
        for i in range(num_decoders):
            hidden_layer = Bidirectional(LSTM(emdim, recurrent_dropout=decoder_dropout,
                                              return_sequences=True), name='bidirectional_decoder_{}'.format(i))
            modeled_passage = hidden_layer(modeled_passage)

        span_begin_probabilities = SpanBegin(name='span_begin')([merged_context, modeled_passage])
        span_end_probabilities = SpanEnd(name='span_end')(
            [encoded_passage, merged_context, modeled_passage, span_begin_probabilities])

        output = CombineOutputs(name='combine_outputs')([span_begin_probabilities, span_end_probabilities])

        model = Model([passage_input, question_input], [output])

        model.summary()

        try:
            model = ModelMGPU(model)
        except:
            pass

        adadelta = Adadelta(lr=0.01)
        model.compile(loss=negative_avg_log_error, optimizer=adadelta, metrics=[accuracy])

        self.model = model
예제 #18
0
 def compile(self, **kwargs):
     optimizer = Adadelta()
     loss = 'mean_square_error'
     if 'optimizer' in kwargs:
         optimizer = kwargs.get('optimizer')
         del kwargs['optimizer']
     if 'loss' in kwargs:
         loss = kwargs.get('loss')
         del kwargs['loss']
     self.model.compile(optimizer=optimizer, loss=loss, **kwargs)
예제 #19
0
    def _build_dqn_model(_env_st_size,
                         _env_ac_size,
                         _learning_rate,
                         _layers,
                         _optimizer,
                         _initializer,
                         _name=None):
        """
        Builds a deep neural net which predicts the Q values for all possible
        actions given a state. The input should have the shape of the state, and
        the output should have the same shape as the action space since we want
        1 Q value per possible action.
        :return: Q network
        """

        state_size = _env_st_size
        action_size = _env_ac_size

        n_network = Sequential(name=_name if (_name is not None) else None)
        # build with no. of layers given
        n_network.add(
            Dense(_layers[0],
                  input_dim=state_size,
                  activation='relu',
                  kernel_initializer=_initializer))
        for l in range(1, len(_layers)):
            n_network.add(
                Dense(_layers[l],
                      activation='relu',
                      kernel_initializer=_initializer))

        # output layer with fixed (action_size) output size.
        n_network.add(
            Dense(action_size,
                  activation='linear',
                  kernel_initializer=_initializer))

        if _optimizer is "RMSprop":
            n_network.compile(loss=custom_loss,
                              optimizer=RMSprop(lr=_learning_rate),
                              metrics=['mae'])
        elif _optimizer is "SGD":
            n_network.compile(loss=custom_loss,
                              optimizer=SGD(lr=_learning_rate),
                              metrics=['mae'])
        elif _optimizer is "Adam":
            n_network.compile(loss=custom_loss,
                              optimizer=Adam(lr=_learning_rate),
                              metrics=['mae'])
        elif _optimizer is "Adadelta":
            n_network.compile(loss=custom_loss,
                              optimizer=Adadelta(lr=_learning_rate),
                              metrics=['mae'])

        return n_network
def attention_model(vocabulary_size, config_params,
                    output_size, pos_vocab_size,
                    lex_vocab_size, visualize=False,
                    plot=False, tokenizer=None):
    hidden_size = int(config_params['hidden_size'])
    batch_size = int(config_params['batch_size'])

    input_type = 'string' if tokenizer is not None else None
    in_sentences = Input(shape=(None,), dtype=input_type,
                         batch_size=batch_size)

    if tokenizer is not None:
        embedding = ElmoEmbeddingLayer()(in_sentences)
        embedding_size = 1024
    else:
        embedding_size = int(config_params['embedding_size'])
        embedding = Embedding(input_dim=vocabulary_size,
                              output_dim=embedding_size,
                              mask_zero=True,
                              name="Embeddings")(in_sentences)

    bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2,
                                recurrent_dropout=0.2,
                                return_sequences=True,
                                input_shape=(None, None, embedding_size)
                                ),
                           merge_mode='sum')(embedding)

    attention = SeqSelfAttention(attention_activation='sigmoid',
                                 name='Attention')(bilstm)

    logits = TimeDistributed(Dense(output_size))(attention)
    in_mask = Input(shape=(None, output_size), batch_size=batch_size,
                    name='Candidate_Synsets_Mask')
    logits_mask = Add()([logits, in_mask])

    pos_logits = TimeDistributed(Dense(pos_vocab_size),
                                 name='POS_logits')(attention)
    lex_logits = TimeDistributed(Dense(lex_vocab_size),
                                 name='LEX_logits')(attention)

    wsd_output = Softmax(name="WSD_output")(logits_mask)
    pos_output = Softmax(name="POS_output")(pos_logits)
    lex_output = Softmax(name="LEX_output")(lex_logits)

    model = Model(inputs=[in_sentences, in_mask],
                  outputs=[wsd_output, pos_output, lex_output],
                  name='BiLSTM_ATT_MultiTask')

    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=Adadelta(), metrics=['acc'])

    visualize_plot_mdl(visualize, plot, model)

    return model
예제 #21
0
 def compile(self, gamma=0.1, loss=['mse'], *args, **kwargs):
     #optimizer = Adam(lr=0.01)
     optimizer = Adadelta(lr=0.1)
     clustering_loss = [self.ss_loss()]  # ['kld']
     #optimizer='adadelta'
     self._model.compile(
         loss=clustering_loss + loss *
         (len(self._model.outputs) - 1),  # capture multioutput models
         loss_weights=[gamma] +
         [1. for _ in range(len(self._model.outputs) - 1)],
         optimizer=optimizer)
예제 #22
0
    def compile_model(self, verbose=1):

        self.model.compile(loss='categorical_crossentropy',
                           optimizer=Adadelta(),
                           metrics=['accuracy'])
        self.model.fit(self.X_train,
                       self.y_train,
                       batch_size=self.batch_size,
                       epochs=self.epochs,
                       verbose=verbose,
                       validation_data=(self.X_test, self.y_test))
예제 #23
0
def baseline_model(vocabulary_size,
                   config_params,
                   output_size,
                   tokenizer=None,
                   visualize=False,
                   plot=False):
    name = 'Baseline'
    hidden_size = int(config_params['hidden_size'])
    batch_size = int(config_params['batch_size'])

    input_type = 'string' if tokenizer is not None else None
    in_sentences = Input(shape=(None, ),
                         dtype=input_type,
                         batch_size=batch_size,
                         name='Input')

    if tokenizer is not None:
        embedding = ElmoEmbeddingLayer()(in_sentences)
        embedding_size = 1024
        name = f'Elmo_{name}'
    else:
        embedding_size = int(config_params['embedding_size'])
        embedding = Embedding(input_dim=vocabulary_size,
                              output_dim=embedding_size,
                              mask_zero=True,
                              name="Embeddings")(in_sentences)

    bilstm = Bidirectional(LSTM(hidden_size,
                                dropout=0.2,
                                recurrent_dropout=0.2,
                                return_sequences=True,
                                input_shape=(None, None, embedding_size)),
                           merge_mode='sum')(embedding)

    logits = TimeDistributed(Dense(output_size))(bilstm)

    in_mask = Input(shape=(None, output_size),
                    batch_size=batch_size,
                    name='Candidate_Synsets_Mask')

    logits_mask = Add()([logits, in_mask])
    output = Softmax()(logits_mask)

    model = Model(inputs=[in_sentences, in_mask], outputs=output, name=name)

    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=Adadelta(),
                  metrics=['acc'])

    visualize_plot_mdl(visualize, plot, model)

    return model
def attention_model(vocabulary_size, config_params,
                    output_size, weights=None,
                    tokenizer=None, visualize=False, plot=False):
    hidden_size = config_params['hidden_size']
    batch_size = int(config_params['batch_size'])

    input_type = 'string' if tokenizer is not None else None
    in_sentences = Input(shape=(None,), dtype=input_type,
                         batch_size=batch_size)
    in_mask = Input(shape=(None, output_size), batch_size=batch_size,
                    name='Candidate_Synsets_Mask')

    if tokenizer is not None:
        embedding = ElmoEmbeddingLayer()(in_sentences)
        embedding_size = 1024
    elif weights is not None:
        embedding_size = weights.shape[1]
        train = False  # To fine-tune pretrained embeddings or not
        embedding = Embedding(input_dim=output_size, output_dim=embedding_size,
                              weights=[weights], trainable=train,
                              mask_zero=True)(in_sentences)
    else:
        embedding_size = int(config_params['embedding_size'])
        embedding = Embedding(input_dim=vocabulary_size,
                              output_dim=embedding_size,
                              mask_zero=True,
                              name="Embeddings")(in_sentences)

    bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2,
                                recurrent_dropout=0.2,
                                return_sequences=True,
                                input_shape=(None, None, embedding_size)
                                ),
                           merge_mode='sum')(embedding)

    attention = SeqSelfAttention(attention_activation='sigmoid',
                                 name='Attention')(bilstm)

    logits = TimeDistributed(Dense(output_size))(attention)
    logits_mask = Add()([logits, in_mask])

    output = Softmax()(logits_mask)

    model = Model(inputs=[in_sentences, in_mask],
                  outputs=output, name="SensEmbed_Attention")

    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=Adadelta(), metrics=['acc'])

    visualize_plot_mdl(visualize, plot, model)

    return model
예제 #25
0
    def __init__(self, output_dir, key):

        # Variables to hold the description of the experiment
        self.config_description = "This is the template config file."

        # System dependent variable
        self._workers = 1
        self._multiprocessing = False
        self._gpus = 1
        self._displayer = MNISTDisplayer()

        # Variables for comet.ml
        self._project_name = "my_project"
        self._workspace = "my_workspace"
        self.output_dir = join(output_dir, "{}_{}_{}".format(self.workspace, self.project_name, key))

        # Network variables
        self.num_classes = 10
        self.img_size = (28, 28)
        self._weights = None
        self._network = MNISTExample(self.num_classes)

        # Training variables
        self._epochs = 5
        self._batch_size = 128
        self._steps_per_epoch = 60000 // 128
        self._optimizer = Adadelta()
        self._loss = categorical_crossentropy
        self._metrics = ['accuracy']

        self._callbacks = []

        self.early_stopping_params = {"monitor":'val_loss', "min_delta":0, "patience":7}
        self.reduce_lr_on_plateau_params = {"monitor":'val_loss', "factor":0.1, "patience":5}

        self.tensorboard = TensorBoard(join(self.output_dir, "checkpoints/logs"))
        self.terminate_on_nan = TerminateOnNaN()
        self.early_stopping = EarlyStopping(**self.early_stopping_params)
        self.reduce_lr_on_plateau = ReduceLROnPlateau(**self.reduce_lr_on_plateau_params)
        self.model_checkpoint = ModelCheckpoint(filepath=join(self.output_dir, "checkpoints", "cp-{epoch:04d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.ckpt"), verbose=1, save_best_only=True, save_weights_only=True)

        self._callbacks = [self.tensorboard, self.terminate_on_nan, self.early_stopping, self.reduce_lr_on_plateau, self.model_checkpoint]

        # Creating the training and validation generator (you may want to move these to the prepare functions)
        train_data, validation_data = mnist.load_data()
        self._train_generator = MNISTGenerator(train_data, self.batch_size)
        self._validation_generator = MNISTGenerator(validation_data, self.batch_size)
        # Dummy test for example
        self._test_generator = MNISTGenerator(validation_data, self.batch_size)

        self._evaluator = None
        self._displayer = MNISTDisplayer()
    def get_optimizer(self) -> Optimizer:
        """
        Returns the configured optimizer for this configuration
        :return:
        """
        if self.optimizer == "SGD":
            return SGD(lr=self.learning_rate, momentum=self.nesterov_momentum, nesterov=True)
        if self.optimizer == "Adam":
            return Adam()
        if self.optimizer == "Adadelta":
            return Adadelta()

        raise Exception("Invalid optimizer {0} requested".format(self.optimizer))
예제 #27
0
def get_optimizer(optim="adam", learning_rate=1e-3):
    if optim == "adam":
        return Adam(learning_rate=learning_rate)
    elif optim == "adagrad":
        return Adagrad(learning_rate=learning_rate)
    elif optim == "sgd":
        return SGD(learning_rate=learning_rate)
    elif optim == "rmsprop":
        return RMSprop(learning_rate=learning_rate)
    elif optim == "adadelta":
        return Adadelta(learning_rate=learning_rate)
    else:
        logger.error(f"Invalid optim {optim}")
        os._exit(0)
예제 #28
0
def train_dense_model(x, y):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(128, )))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_categories, activation='softmax'))

    model.compile(Adadelta(),
                  loss=categorical_crossentropy,
                  metrics=['accuracy'])

    model.fit(x, y, batch_size=batch_size, epochs=epochs)

    return model
예제 #29
0
 def get_optimizer(optimizer):
     if optimizer == "sdg":
         return SGD(learning_rate=0.01,
                    decay=1e-6,
                    momentum=0.9,
                    nesterov=True,
                    clipnorm=5)
     if optimizer == "rmsprop":
         return RMSprop(learning_rate=0.01)
     if optimizer == "adam":
         return Adam(learning_rate=0.01)
     if optimizer == "adagrad":
         return Adagrad(learning_rate=0.01)
     if optimizer == "adadelta":
         return Adadelta(learning_rate=1.0)
예제 #30
0
    def get_model(self):

        embedding = Embedding(5000, 300, mask_zero=True, trainable=True)

        self.question_model = self.get_text_model(embedding)
        self.sentence_model = self.get_text_model(embedding, use_attention=True)

        self.section_model = self.get_section_model(self.sentence_model, self.question_model)
        self.document_model = self.get_document_model(self.section_model, self.question_model)

        optimizer = Adadelta()

        loss_metrics = "binary_crossentropy"

        self.document_model.compile(loss=loss_metrics, optimizer=optimizer, metrics=[loss_metrics])
        self.document_model.summary()