Beispiel #1
0
    def prepare_models(self, optimizer=None, loss=None):

        self.autoencoder = Model(self.input, self.decoded)
        self._encoder = Model(self.input, self.encoded)
        self.z_input = Input(shape=(self.latent_dim,))
        self.x_output = self.z_input
        for layer in self.autoencoder.layers[self.encoder_index:]:
            self.x_output = layer(self.x_output)
        
        self._decoder = Model(self.z_input, self.x_output)
        
        if optimizer is None:
            self.optimizer = Adadelta(self.learning_rate)
        else:
            self.optimizer = optimizer
            
        if loss is None:
            self.loss = mse
        else:
            self.loss = loss
            
        if not self.variational:
            self.autoencoder.compile(optimizer=self.optimizer, loss=self.loss)
        else:
            self.reconstruction_loss = self.input_dim*self.loss(K.flatten(self.input), K.flatten(self.decoded))
            self.kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var)
            self.kl_loss = -0.5*K.sum(self.kl_loss, axis=-1)
            self.vae_loss = K.mean(self.reconstruction_loss + self.kl_loss)
            self.autoencoder.add_loss(self.vae_loss)
            self.autoencoder.compile(optimizer=self.optimizer)
Beispiel #2
0
    def __init__(self, height, width):
        self.row = height
        self.column = width

        # Regularization term
        self.reg = 1e-4

        self.pi = None
        self.v = None

        states = Input(shape=(self.row, self.column, 2))
        print('Initializing model:')

        conv = self.conv_block(states)
        print('conv:', conv.shape)

        res = self.res_block(conv, Config.res_blocks)
        print('res', res.shape)

        self.pi = self.policy_head(res)
        print('pi', self.pi.shape)

        self.v = self.value_head(res)
        print('v', self.v.shape)

        self.model = Model(inputs=states, outputs=[self.pi, self.v])

        self.model.compile(optimizer=Adadelta(),
                           loss=[categorical_crossentropy, mean_squared_error],
                           loss_weights=[0.5, 0.5],
                           metrics=["accuracy"])
Beispiel #3
0
 def init_nn(self, input_size, hidden_layers_dim):
     """Initializes the neural sequential model by adding layers and compiling the model.
     There is no call to fit(), because the eligibilities need to be applied to the gradients
      before the gradients can be used to update the model weights. This is done in split-gd"""
     opt = Adadelta(
         learning_rate=self.learning_rate
     )  # Adagrad is well-suited for dealing with sparse data, Adadelta is extension that solves problem of shrinking learning rate
     loss = MeanSquaredError(
     )  # Larger errors should be penalized more than smaller ones
     model = KER.models.Sequential()
     model.add(
         KER.layers.Dense(input_size,
                          activation="relu",
                          input_shape=(input_size, ))
     )  # input layer expect one-dimensional array with input_size elements for input. This will automatically build network
     for i in range(len(hidden_layers_dim)):
         model.add(KER.layers.Dense(
             hidden_layers_dim[i],
             activation="relu"))  # relu gives quick convergence
     model.add(
         KER.layers.Dense(1)
     )  # Observation: no activation function gives quicker convergence (could use linear)
     model.compile(optimizer=opt, loss=loss, metrics=[
         "mean_squared_error"
     ])  # MSE is one ot the most preferred metrics for regression tasks
     # model.summary()
     return model
def multitask_attention_model(output_size,
                              pos_vocab_size,
                              lex_vocab_size,
                              config_params,
                              visualize=False,
                              plot=False):
    hidden_size = int(config_params['hidden_size'])
    batch_size = int(config_params['batch_size'])
    embedding_size = 768
    max_seq_len = 512

    in_id = Input(shape=(max_seq_len, ), name="input_ids")
    in_mask = Input(shape=(max_seq_len, ), name="input_masks")
    in_segment = Input(shape=(max_seq_len, ), name="segment_ids")
    bert_inputs = [in_id, in_mask, in_segment]

    bert_output_ = BertEmbeddingLayer(n_fine_tune_layers=3,
                                      pooling="mean")(bert_inputs)
    bert_output = Reshape((max_seq_len, embedding_size))(bert_output_)

    in_mask = Input(shape=(None, output_size),
                    batch_size=batch_size,
                    name='Candidate_Synsets_Mask')
    bert_inputs.append(in_mask)

    bilstm = Bidirectional(LSTM(hidden_size,
                                dropout=0.2,
                                recurrent_dropout=0.2,
                                return_sequences=True,
                                input_shape=(None, None, embedding_size)),
                           merge_mode='sum')(bert_output)

    attention = SeqSelfAttention(units=128,
                                 attention_activation='sigmoid',
                                 name='Attention')(bilstm)

    logits = TimeDistributed(Dense(output_size))(attention)
    logits_mask = Add()([logits, in_mask])

    pos_logits = TimeDistributed(Dense(pos_vocab_size),
                                 name='POS_logits')(attention)
    lex_logits = TimeDistributed(Dense(lex_vocab_size),
                                 name='LEX_logits')(attention)

    wsd_output = Softmax(name="WSD_output")(logits_mask)
    pos_output = Softmax(name="POS_output")(pos_logits)
    lex_output = Softmax(name="LEX_output")(lex_logits)

    model = Model(inputs=bert_inputs,
                  outputs=[wsd_output, pos_output, lex_output],
                  name='Bert_BiLSTM_ATT_MultiTask')

    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=Adadelta(),
                  metrics=['acc'])

    visualize_plot_mdl(visualize, plot, model)

    return model
Beispiel #5
0
    def __init__(self, emdim, max_passage_length=None, max_query_length=None, num_highway_layers=2, num_decoders=1,
                 encoder_dropout=0, decoder_dropout=0):
        self.emdim = emdim
        self.max_passage_length = max_passage_length
        self.max_query_length = max_query_length

        passage_input = Input(shape=(self.max_passage_length, emdim), dtype='float32', name="passage_input")
        question_input = Input(shape=(self.max_query_length, emdim), dtype='float32', name="question_input")

        question_embedding = question_input
        passage_embedding = passage_input
        for i in range(num_highway_layers):
            highway_layer = Highway(name='highway_{}'.format(i))
            question_layer = TimeDistributed(highway_layer, name=highway_layer.name + "_qtd")
            question_embedding = question_layer(question_embedding)
            passage_layer = TimeDistributed(highway_layer, name=highway_layer.name + "_ptd")
            passage_embedding = passage_layer(passage_embedding)

        encoder_layer = Bidirectional(LSTM(emdim, recurrent_dropout=encoder_dropout,
                                           return_sequences=True), name='bidirectional_encoder')
        encoded_question = encoder_layer(question_embedding)
        encoded_passage = encoder_layer(passage_embedding)

        similarity_matrix = Similarity(name='similarity_layer')([encoded_passage, encoded_question])

        context_to_query_attention = C2QAttention(name='context_to_query_attention')([
            similarity_matrix, encoded_question])
        query_to_context_attention = Q2CAttention(name='query_to_context_attention')([
            similarity_matrix, encoded_passage])

        merged_context = MergedContext(name='merged_context')(
            [encoded_passage, context_to_query_attention, query_to_context_attention])

        modeled_passage = merged_context
        for i in range(num_decoders):
            hidden_layer = Bidirectional(LSTM(emdim, recurrent_dropout=decoder_dropout,
                                              return_sequences=True), name='bidirectional_decoder_{}'.format(i))
            modeled_passage = hidden_layer(modeled_passage)

        span_begin_probabilities = SpanBegin(name='span_begin')([merged_context, modeled_passage])
        span_end_probabilities = SpanEnd(name='span_end')(
            [encoded_passage, merged_context, modeled_passage, span_begin_probabilities])

        output = CombineOutputs(name='combine_outputs')([span_begin_probabilities, span_end_probabilities])

        model = Model([passage_input, question_input], [output])

        model.summary()

        try:
            model = ModelMGPU(model)
        except:
            pass

        adadelta = Adadelta(lr=0.01)
        model.compile(loss=negative_avg_log_error, optimizer=adadelta, metrics=[accuracy])

        self.model = model
Beispiel #6
0
 def compile(self, **kwargs):
     optimizer = Adadelta()
     loss = 'mean_square_error'
     if 'optimizer' in kwargs:
         optimizer = kwargs.get('optimizer')
         del kwargs['optimizer']
     if 'loss' in kwargs:
         loss = kwargs.get('loss')
         del kwargs['loss']
     self.model.compile(optimizer=optimizer, loss=loss, **kwargs)
def attention_model(vocabulary_size, config_params,
                    output_size, pos_vocab_size,
                    lex_vocab_size, visualize=False,
                    plot=False, tokenizer=None):
    hidden_size = int(config_params['hidden_size'])
    batch_size = int(config_params['batch_size'])

    input_type = 'string' if tokenizer is not None else None
    in_sentences = Input(shape=(None,), dtype=input_type,
                         batch_size=batch_size)

    if tokenizer is not None:
        embedding = ElmoEmbeddingLayer()(in_sentences)
        embedding_size = 1024
    else:
        embedding_size = int(config_params['embedding_size'])
        embedding = Embedding(input_dim=vocabulary_size,
                              output_dim=embedding_size,
                              mask_zero=True,
                              name="Embeddings")(in_sentences)

    bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2,
                                recurrent_dropout=0.2,
                                return_sequences=True,
                                input_shape=(None, None, embedding_size)
                                ),
                           merge_mode='sum')(embedding)

    attention = SeqSelfAttention(attention_activation='sigmoid',
                                 name='Attention')(bilstm)

    logits = TimeDistributed(Dense(output_size))(attention)
    in_mask = Input(shape=(None, output_size), batch_size=batch_size,
                    name='Candidate_Synsets_Mask')
    logits_mask = Add()([logits, in_mask])

    pos_logits = TimeDistributed(Dense(pos_vocab_size),
                                 name='POS_logits')(attention)
    lex_logits = TimeDistributed(Dense(lex_vocab_size),
                                 name='LEX_logits')(attention)

    wsd_output = Softmax(name="WSD_output")(logits_mask)
    pos_output = Softmax(name="POS_output")(pos_logits)
    lex_output = Softmax(name="LEX_output")(lex_logits)

    model = Model(inputs=[in_sentences, in_mask],
                  outputs=[wsd_output, pos_output, lex_output],
                  name='BiLSTM_ATT_MultiTask')

    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=Adadelta(), metrics=['acc'])

    visualize_plot_mdl(visualize, plot, model)

    return model
Beispiel #8
0
    def _build_dqn_model(_env_st_size,
                         _env_ac_size,
                         _learning_rate,
                         _layers,
                         _optimizer,
                         _initializer,
                         _name=None):
        """
        Builds a deep neural net which predicts the Q values for all possible
        actions given a state. The input should have the shape of the state, and
        the output should have the same shape as the action space since we want
        1 Q value per possible action.
        :return: Q network
        """

        state_size = _env_st_size
        action_size = _env_ac_size

        n_network = Sequential(name=_name if (_name is not None) else None)
        # build with no. of layers given
        n_network.add(
            Dense(_layers[0],
                  input_dim=state_size,
                  activation='relu',
                  kernel_initializer=_initializer))
        for l in range(1, len(_layers)):
            n_network.add(
                Dense(_layers[l],
                      activation='relu',
                      kernel_initializer=_initializer))

        # output layer with fixed (action_size) output size.
        n_network.add(
            Dense(action_size,
                  activation='linear',
                  kernel_initializer=_initializer))

        if _optimizer is "RMSprop":
            n_network.compile(loss=custom_loss,
                              optimizer=RMSprop(lr=_learning_rate),
                              metrics=['mae'])
        elif _optimizer is "SGD":
            n_network.compile(loss=custom_loss,
                              optimizer=SGD(lr=_learning_rate),
                              metrics=['mae'])
        elif _optimizer is "Adam":
            n_network.compile(loss=custom_loss,
                              optimizer=Adam(lr=_learning_rate),
                              metrics=['mae'])
        elif _optimizer is "Adadelta":
            n_network.compile(loss=custom_loss,
                              optimizer=Adadelta(lr=_learning_rate),
                              metrics=['mae'])

        return n_network
Beispiel #9
0
 def compile(self, gamma=0.1, loss=['mse'], *args, **kwargs):
     #optimizer = Adam(lr=0.01)
     optimizer = Adadelta(lr=0.1)
     clustering_loss = [self.ss_loss()]  # ['kld']
     #optimizer='adadelta'
     self._model.compile(
         loss=clustering_loss + loss *
         (len(self._model.outputs) - 1),  # capture multioutput models
         loss_weights=[gamma] +
         [1. for _ in range(len(self._model.outputs) - 1)],
         optimizer=optimizer)
Beispiel #10
0
    def compile_model(self, verbose=1):

        self.model.compile(loss='categorical_crossentropy',
                           optimizer=Adadelta(),
                           metrics=['accuracy'])
        self.model.fit(self.X_train,
                       self.y_train,
                       batch_size=self.batch_size,
                       epochs=self.epochs,
                       verbose=verbose,
                       validation_data=(self.X_test, self.y_test))
def baseline_model(vocabulary_size,
                   config_params,
                   output_size,
                   tokenizer=None,
                   visualize=False,
                   plot=False):
    name = 'Baseline'
    hidden_size = int(config_params['hidden_size'])
    batch_size = int(config_params['batch_size'])

    input_type = 'string' if tokenizer is not None else None
    in_sentences = Input(shape=(None, ),
                         dtype=input_type,
                         batch_size=batch_size,
                         name='Input')

    if tokenizer is not None:
        embedding = ElmoEmbeddingLayer()(in_sentences)
        embedding_size = 1024
        name = f'Elmo_{name}'
    else:
        embedding_size = int(config_params['embedding_size'])
        embedding = Embedding(input_dim=vocabulary_size,
                              output_dim=embedding_size,
                              mask_zero=True,
                              name="Embeddings")(in_sentences)

    bilstm = Bidirectional(LSTM(hidden_size,
                                dropout=0.2,
                                recurrent_dropout=0.2,
                                return_sequences=True,
                                input_shape=(None, None, embedding_size)),
                           merge_mode='sum')(embedding)

    logits = TimeDistributed(Dense(output_size))(bilstm)

    in_mask = Input(shape=(None, output_size),
                    batch_size=batch_size,
                    name='Candidate_Synsets_Mask')

    logits_mask = Add()([logits, in_mask])
    output = Softmax()(logits_mask)

    model = Model(inputs=[in_sentences, in_mask], outputs=output, name=name)

    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=Adadelta(),
                  metrics=['acc'])

    visualize_plot_mdl(visualize, plot, model)

    return model
def attention_model(vocabulary_size, config_params,
                    output_size, weights=None,
                    tokenizer=None, visualize=False, plot=False):
    hidden_size = config_params['hidden_size']
    batch_size = int(config_params['batch_size'])

    input_type = 'string' if tokenizer is not None else None
    in_sentences = Input(shape=(None,), dtype=input_type,
                         batch_size=batch_size)
    in_mask = Input(shape=(None, output_size), batch_size=batch_size,
                    name='Candidate_Synsets_Mask')

    if tokenizer is not None:
        embedding = ElmoEmbeddingLayer()(in_sentences)
        embedding_size = 1024
    elif weights is not None:
        embedding_size = weights.shape[1]
        train = False  # To fine-tune pretrained embeddings or not
        embedding = Embedding(input_dim=output_size, output_dim=embedding_size,
                              weights=[weights], trainable=train,
                              mask_zero=True)(in_sentences)
    else:
        embedding_size = int(config_params['embedding_size'])
        embedding = Embedding(input_dim=vocabulary_size,
                              output_dim=embedding_size,
                              mask_zero=True,
                              name="Embeddings")(in_sentences)

    bilstm = Bidirectional(LSTM(hidden_size, dropout=0.2,
                                recurrent_dropout=0.2,
                                return_sequences=True,
                                input_shape=(None, None, embedding_size)
                                ),
                           merge_mode='sum')(embedding)

    attention = SeqSelfAttention(attention_activation='sigmoid',
                                 name='Attention')(bilstm)

    logits = TimeDistributed(Dense(output_size))(attention)
    logits_mask = Add()([logits, in_mask])

    output = Softmax()(logits_mask)

    model = Model(inputs=[in_sentences, in_mask],
                  outputs=output, name="SensEmbed_Attention")

    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=Adadelta(), metrics=['acc'])

    visualize_plot_mdl(visualize, plot, model)

    return model
Beispiel #13
0
    def __init__(self, output_dir, key):

        # Variables to hold the description of the experiment
        self.config_description = "This is the template config file."

        # System dependent variable
        self._workers = 1
        self._multiprocessing = False
        self._gpus = 1
        self._displayer = MNISTDisplayer()

        # Variables for comet.ml
        self._project_name = "my_project"
        self._workspace = "my_workspace"
        self.output_dir = join(output_dir, "{}_{}_{}".format(self.workspace, self.project_name, key))

        # Network variables
        self.num_classes = 10
        self.img_size = (28, 28)
        self._weights = None
        self._network = MNISTExample(self.num_classes)

        # Training variables
        self._epochs = 5
        self._batch_size = 128
        self._steps_per_epoch = 60000 // 128
        self._optimizer = Adadelta()
        self._loss = categorical_crossentropy
        self._metrics = ['accuracy']

        self._callbacks = []

        self.early_stopping_params = {"monitor":'val_loss', "min_delta":0, "patience":7}
        self.reduce_lr_on_plateau_params = {"monitor":'val_loss', "factor":0.1, "patience":5}

        self.tensorboard = TensorBoard(join(self.output_dir, "checkpoints/logs"))
        self.terminate_on_nan = TerminateOnNaN()
        self.early_stopping = EarlyStopping(**self.early_stopping_params)
        self.reduce_lr_on_plateau = ReduceLROnPlateau(**self.reduce_lr_on_plateau_params)
        self.model_checkpoint = ModelCheckpoint(filepath=join(self.output_dir, "checkpoints", "cp-{epoch:04d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.ckpt"), verbose=1, save_best_only=True, save_weights_only=True)

        self._callbacks = [self.tensorboard, self.terminate_on_nan, self.early_stopping, self.reduce_lr_on_plateau, self.model_checkpoint]

        # Creating the training and validation generator (you may want to move these to the prepare functions)
        train_data, validation_data = mnist.load_data()
        self._train_generator = MNISTGenerator(train_data, self.batch_size)
        self._validation_generator = MNISTGenerator(validation_data, self.batch_size)
        # Dummy test for example
        self._test_generator = MNISTGenerator(validation_data, self.batch_size)

        self._evaluator = None
        self._displayer = MNISTDisplayer()
    def get_optimizer(self) -> Optimizer:
        """
        Returns the configured optimizer for this configuration
        :return:
        """
        if self.optimizer == "SGD":
            return SGD(lr=self.learning_rate, momentum=self.nesterov_momentum, nesterov=True)
        if self.optimizer == "Adam":
            return Adam()
        if self.optimizer == "Adadelta":
            return Adadelta()

        raise Exception("Invalid optimizer {0} requested".format(self.optimizer))
Beispiel #15
0
def get_optimizer(optim="adam", learning_rate=1e-3):
    if optim == "adam":
        return Adam(learning_rate=learning_rate)
    elif optim == "adagrad":
        return Adagrad(learning_rate=learning_rate)
    elif optim == "sgd":
        return SGD(learning_rate=learning_rate)
    elif optim == "rmsprop":
        return RMSprop(learning_rate=learning_rate)
    elif optim == "adadelta":
        return Adadelta(learning_rate=learning_rate)
    else:
        logger.error(f"Invalid optim {optim}")
        os._exit(0)
Beispiel #16
0
 def get_optimizer(optimizer):
     if optimizer == "sdg":
         return SGD(learning_rate=0.01,
                    decay=1e-6,
                    momentum=0.9,
                    nesterov=True,
                    clipnorm=5)
     if optimizer == "rmsprop":
         return RMSprop(learning_rate=0.01)
     if optimizer == "adam":
         return Adam(learning_rate=0.01)
     if optimizer == "adagrad":
         return Adagrad(learning_rate=0.01)
     if optimizer == "adadelta":
         return Adadelta(learning_rate=1.0)
Beispiel #17
0
def train_dense_model(x, y):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(128, )))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_categories, activation='softmax'))

    model.compile(Adadelta(),
                  loss=categorical_crossentropy,
                  metrics=['accuracy'])

    model.fit(x, y, batch_size=batch_size, epochs=epochs)

    return model
def train_model(X_train, y_train, window_size, num_layers):
    """
    This function trains the model with the training data.
    :param X_train: the training samples
    :param y_train: the training values
    :param window_size: the sliding window size
    :param num_layers: the number of hidden layers
    :return: a trained model
    """
    model = build_nn_model(num_layers, 3 * window_size)
    model.compile(loss='mean_squared_error',
                  optimizer=Adadelta(learning_rate=0.005,
                                     rho=0.9999,
                                     epsilon=1e-10))
    model.fit(X_train, y_train, epochs=100, verbose=0)
    return model
Beispiel #19
0
 def select_optimizer(self, opt_type, learning_rate, clipnorm = 0.5):
     if opt_type == 'adam':
         return Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
     elif opt_type == 'rmsprop':
         return RMSprop(lr=learning_rate, rho=0.9, epsilon=None, decay=0.0)
     elif opt_type == 'adagrad':
         return Adagrad(lr=learning_rate, epsilon=None, decay=0.0)
     elif opt_type == 'adadelta':
         return Adadelta(lr=learning_rate, rho=0.95, epsilon=None, decay=0.0)    
     elif opt_type == 'nadam':
         return Nadam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
     elif opt_type == 'sgd':
         return SGD(lr = learning_rate, momentum = 0.0, decay = 0.0, nesterov = False, clipnorm = clipnorm)
     else:
         print('No optimizer')
         quit()
Beispiel #20
0
    def get_model(self):

        embedding = Embedding(5000, 300, mask_zero=True, trainable=True)

        self.question_model = self.get_text_model(embedding)
        self.sentence_model = self.get_text_model(embedding, use_attention=True)

        self.section_model = self.get_section_model(self.sentence_model, self.question_model)
        self.document_model = self.get_document_model(self.section_model, self.question_model)

        optimizer = Adadelta()

        loss_metrics = "binary_crossentropy"

        self.document_model.compile(loss=loss_metrics, optimizer=optimizer, metrics=[loss_metrics])
        self.document_model.summary()
Beispiel #21
0
def create_model_3(input_shape, num_classes):
    global image_shape
    image_shape = (331, 331, 3)

    model = tf.keras.applications.NASNetMobile(
        include_top=True,
        weights=None,
        input_tensor=None,
        input_shape=input_shape,
        pooling=None,
        classes=num_classes,
        # classifier_activation="softmax",
    )
    optimizer = Adadelta(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model
Beispiel #22
0
    def define_discriminator(self, alpha=0.2, dropout=0.2):
        """Define the standalone discriminator model."""
        from tensorflow.keras.models import Sequential  # pylint: disable=E0611,E0401
        from tensorflow.keras.optimizers import Adadelta  # pylint: disable=E0611,E0401
        from tensorflow.keras.layers import Dense, Conv2D, Dropout, Reshape, LeakyReLU, Flatten  # pylint: disable=E0611,E0401

        model = Sequential()
        model.add(Dense(200, use_bias=False, input_dim=self.nqubits))
        model.add(Reshape((10, 10, 2)))
        model.add(
            Conv2D(64,
                   kernel_size=3,
                   strides=1,
                   padding='same',
                   kernel_initializer='glorot_normal'))
        model.add(LeakyReLU(alpha=alpha))
        model.add(
            Conv2D(32,
                   kernel_size=3,
                   strides=1,
                   padding='same',
                   kernel_initializer='glorot_normal'))
        model.add(LeakyReLU(alpha=alpha))
        model.add(
            Conv2D(16,
                   kernel_size=3,
                   strides=1,
                   padding='same',
                   kernel_initializer='glorot_normal'))
        model.add(LeakyReLU(alpha=alpha))
        model.add(
            Conv2D(8,
                   kernel_size=3,
                   strides=1,
                   padding='same',
                   kernel_initializer='glorot_normal'))
        model.add(Flatten())
        model.add(LeakyReLU(alpha=alpha))
        model.add(Dropout(dropout))
        model.add(Dense(1, activation='sigmoid'))

        # compile model
        opt = Adadelta(learning_rate=0.1)
        model.compile(loss='binary_crossentropy',
                      optimizer=opt,
                      metrics=['accuracy'])
        return model
def attention_model(output_size,
                    max_seq_len,
                    config_params,
                    visualize=False,
                    plot=False):
    embedding_size = 768
    hidden_size = int(config_params['hidden_size'])
    batch_size = int(config_params['batch_size'])

    in_id = Input(shape=(max_seq_len, ), name="input_ids")
    in_mask = Input(shape=(max_seq_len, ), name="input_masks")
    in_segment = Input(shape=(max_seq_len, ), name="segment_ids")
    bert_inputs = [in_id, in_mask, in_segment]

    bert_output_ = BertEmbeddingLayer(n_fine_tune_layers=3,
                                      pooling="mean")(bert_inputs)
    bert_output = Reshape((max_seq_len, embedding_size))(bert_output_)

    bilstm = Bidirectional(
        LSTM(hidden_size,
             dropout=0.2,
             recurrent_dropout=0.2,
             return_sequences=True))(bert_output)
    attention = SeqSelfAttention(attention_activation='sigmoid',
                                 name='Attention')(bilstm)

    logits = TimeDistributed(Dense(output_size))(attention)

    in_mask = Input(shape=(None, output_size),
                    batch_size=batch_size,
                    name='Candidate_Synsets_Mask')
    bert_inputs.append(in_mask)

    logits_mask = Add()([logits, in_mask])
    output = Softmax()(logits_mask)

    mdl = Model(inputs=bert_inputs,
                outputs=output,
                name="Bert_Attention_BiLSTM")

    mdl.compile(loss="sparse_categorical_crossentropy",
                optimizer=Adadelta(),
                metrics=['acc'])

    visualize_plot_mdl(visualize, plot, mdl)

    return mdl
Beispiel #24
0
    def _optimization(self):
        space = {
            'lstm_layers': hp.choice('lstm_layers', (1, 5)),
            'dense_layers': hp.choice('dense_layers', (1, 5)),
            'base_neurons': hp.choice('base_neurons', (64, 128, 256)),
            'optimizer': hp.choice('optimizer',
                                   (RMSprop(), Adam(), Adadelta())),
            'activation': hp.choice('activation', ('relu', 'tanh')),
            'learning_rate': hp.uniform('learning_rate', 0.01, 0.5)
        }

        construction_dict = fmin(fn=self._compile_and_fit,
                                 space=space,
                                 algo=tpe.suggest,
                                 max_evals=50)

        return construction_dict
Beispiel #25
0
def buildSepcvn():
    model = models.Sequential()
    
    model.add(SeparableConv2D(32,(3,3),padding='same',input_shape=(256,256,1), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D())
    model.add(Dropout(0.25))
    
    model.add(SeparableConv2D(64,(3,3),padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(SeparableConv2D(64,(3,3),padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D())
    model.add(Dropout(0.25))
    
    model.add(SeparableConv2D(128,(3,3),padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(SeparableConv2D(128,(3,3),padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(SeparableConv2D(128,(3,3),padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D())
    model.add(Dropout(0.25))
    
    model.add(GlobalAveragePooling2D())
    model.add(Flatten())
#     model.add(Dense(128, activation='relu'))
#     model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
#     model.add(Dense(512, activation='relu'))
#     model.add(BatchNormalization())
#     model.add(Dropout(0.5))
    
#     model.add(Dense(128, activation='relu'))
#     model.add(BatchNormalization())
#     model.add(Dropout(0.5))
    model.add(Dense(1,activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy',
    optimizer=Adadelta(lr=1e-2),
    metrics=['accuracy'])
    
    model.summary()
    
    return model
Beispiel #26
0
def mlp():
    model = Sequential()
    model.add(Dense(5, activation='relu', kernel_constraint=NonNeg()))
    model.add(Dropout(0.5))

    model.add(Dense(10, activation='relu', kernel_constraint=NonNeg()))
    model.add(Dropout(0.5))

    model.add(Dense(20, activation='relu', kernel_constraint=NonNeg()))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='relu', kernel_constraint=NonNeg()))
    model.add(Dropout(0.5))
    model.add(Dense(7, activation='linear', kernel_constraint=NonNeg()))

    opt = Adadelta(lr=0.001)
    model.compile(loss='mae', optimizer=opt,metrics=["accuracy"])
    return model
Beispiel #27
0
 def test_allowed_slot_names(self):
     opt_and_slots_pairs = [
         (SGD(), []),
         (SGD(momentum=0.2), ["momentum"]),
         (Adam(), ["m", "v"]),
         (Adam(amsgrad=True), ["m", "v", "vhat"]),
         (Adamax(), ["m", "v"]),
         (Nadam(), ["m", "v"]),
         (Adadelta(), ["accum_grad", "accum_var"]),
         (Adagrad(), ["accumulator"]),
         (Ftrl(), ["accumulator", "linear"]),
         (RMSprop(), ["rms"]),
         (RMSprop(momentum=0.2), ["rms", "momentum"]),
         (RMSprop(centered=True), ["rms", "mg"]),
         (RMSprop(momentum=0.2, centered=True), ["rms", "momentum", "mg"]),
     ]
     for opt, expected_slots in opt_and_slots_pairs:
         self._compare_slot_names(opt, expected_slots)
Beispiel #28
0
def create_model_W2V(embedding_size, sentence_size, vocab_size, conv_n_filters, conv_filter_sizes, conv_window_size,
                 pool_window_size, hidden_layer_size, dropout_size, n_classes, embedding_weights=None, static=False,
                 rand=False, multichannel=False):

    inp = Input(shape=(sentence_size,))
    if multichannel:
        emb1 = Embedding(vocab_size, embedding_size, weights=[
                         embedding_weights], trainable=False)(inp)
        emb1 = Reshape((sentence_size, embedding_size, 1))(emb1)
        emb2 = Embedding(vocab_size, embedding_size, weights=[
                         embedding_weights], trainable=True)(inp)
        emb2 = Reshape((sentence_size, embedding_size, 1))(emb2)
        x = Concatenate()([emb1, emb2])
    else:
        if rand:
            emb = Embedding(vocab_size, embedding_size)(inp)
        elif static:
            emb = Embedding(vocab_size, embedding_size, weights=[
                            embedding_weights], trainable=False)(inp)
        else:
            emb = Embedding(vocab_size, embedding_size, weights=[
                            embedding_weights], trainable=True)(inp)
        x = Reshape((sentence_size, embedding_size, 1))(emb)

    convolution_layer = []
    for filter_size in conv_filter_sizes:
        conv_window_size = (filter_size, embedding_size)
        conv = Conv2D(conv_n_filters, conv_window_size,
                      activation='relu', use_bias=True, padding='valid')(x)
        convolution_layer.append(GlobalMaxPooling2D()(conv))

    x = Concatenate()(convolution_layer)
    x = Dropout(dropout_size)(x)
    x = Dense(n_classes, activation='softmax',
              kernel_constraint=max_norm(3))(x)

    model = Model(inputs=inp, outputs=x)

    model.compile(optimizer=Adadelta(),
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])

    return model
Beispiel #29
0
def createModel(data_dim):
    model = Sequential()
    print(data_dim)
    model.add(GRU(15, input_shape=(data_dim[1], 1)))
    model.add(Dropout(0.375))
    # model.add(GRU(7))
    # model.add(Dropout(0.25))
    model.add(Dense(5, activation='relu'))
    model.summary()
    model.compile(
        optimizer=Adadelta(),
        loss=[  #"mean_squared_error",
            #'sparse_categorical_crossenropy',
            #"mean_squared_error",
            #"mean_squared_error",
            "mean_squared_error"
        ],
        metrics=['accuracy'])
    return model
    def create_optimizer(self, optimizer=None):
        if not optimizer:
            raise ValueError

        learning_rate = self.config.train.learning_rate
        decay = self.config.train.decay
        momentum = self.config.train.momentum
        if optimizer == "adam":
            self.optimizer = Adam(lr=learning_rate, decay=decay)
        elif optimizer == "sgd":
            self.optimizer = SGD(lr=learning_rate,
                                 decay=decay,
                                 momentum=momentum)
        elif optimizer == "adadelta":
            self.optimizer = Adadelta(lr=learning_rate, decay=decay)
        else:
            raise ValueError

        self.optimizer_name = optimizer