예제 #1
0
    def build_model(self):
        real_value_input = Input(shape=(self.field_dim[0], ))
        discrete_input = Input(shape=(self.field_dim[1], ))
        embeddings = Embedding(
            self.feature_dim + 1,
            self.embedding_size,
            embeddings_initializer=truncated_normal(stddev=self.init_std),
            embeddings_regularizer=l2(self.reg),
            mask_zero=True,
            trainable=True)(discrete_input)
        reshape = _Reshape(target_shape=(-1, ))(embeddings)
        # features = Concatenate(axis=1)([real_value_input, reshape])
        features = Concatenate(axis=1)([real_value_input, reshape])
        dense_network_out = features
        for each in self.hidden_size:
            dense_network_out = Dense(
                each,
                activation='relu',
                kernel_initializer=truncated_normal(stddev=self.init_std),
                kernel_regularizer=l2(self.reg))(dense_network_out)

        cross_network_out = CrossLayer(self.input_dim, self.cross_layer_num,
                                       self.reg)(features)
        # self.hidden_size[-1]+ self.field_dim[0] + self.field_dim[1] * self.embedding_size
        concat = Concatenate(
            axis=1, name='concat')([dense_network_out, cross_network_out])

        output = Dense(
            1,
            activation='sigmoid',
            kernel_initializer=truncated_normal(stddev=self.init_std),
            kernel_regularizer=l2(self.reg))(concat)
        return Model([real_value_input, discrete_input], [output])
예제 #2
0
 def build_model(self):
     inputs = Input((self.field_dim, ))
     embeddings = Embedding(self.feature_dim + 1,
                            self.embedding_size,
                            embeddings_initializer=truncated_normal(
                                self.init_std),
                            embeddings_regularizer=l2(self.reg),
                            mask_zero=False,
                            trainable=True)(inputs)
     z = ZLayer(self.output_dim, self.reg)(embeddings)
     p = None
     if self.mode == 'outer':
         p = OuterProductLayer(self.output_dim, self.reg)(embeddings)
     else:
         pass
     features = Concatenate(axis=1)([z, p])
     outputs = LeakyReLU(1.0)(features)
     for i in range(len(self.fully_list)):
         if i < len(self.fully_list) - 1:
             outputs = Dropout(self.keep_prob)(Dense(
                 self.fully_list[i],
                 activation='relu',
                 kernel_initializer=truncated_normal(stddev=self.init_std),
                 kernel_regularizer=l2(self.reg))(outputs))
         else:
             outputs = Dense(
                 1,
                 activation='sigmoid',
                 kernel_initializer=truncated_normal(stddev=self.init_std),
                 kernel_regularizer=l2(self.reg))(outputs)
     return Model([inputs], outputs)
예제 #3
0
 def build(self, input_shape):
     assert len(input_shape) == 2
     self.W_query = self.add_weight(
         shape=(input_shape[0][-1],
                self.num_attention_heads * self.size_per_head),
         name='Wq',
         initializer=initializers.truncated_normal(
             stddev=self.initializer_range))
     self.bias_query = self.add_weight(
         shape=(self.num_attention_heads * self.size_per_head, ),
         name='bq',
         initializer=initializers.get('zeros'))
     self.W_key = self.add_weight(
         shape=(input_shape[1][-1],
                self.num_attention_heads * self.size_per_head),
         name='Wk',
         initializer=initializers.truncated_normal(
             stddev=self.initializer_range))
     self.bias_key = self.add_weight(shape=(self.num_attention_heads *
                                            self.size_per_head, ),
                                     name='bk',
                                     initializer=initializers.get('zeros'))
     self.W_value = self.add_weight(
         shape=(input_shape[1][-1],
                self.num_attention_heads * self.size_per_head),
         name='Wv',
         initializer=initializers.truncated_normal(
             stddev=self.initializer_range))
     self.bias_value = self.add_weight(
         shape=(self.num_attention_heads * self.size_per_head, ),
         name='bv',
         initializer=initializers.get('zeros'))
     super(MultiHeadAttentionLayer, self).build(input_shape)
예제 #4
0
 def infer(X, trainable=True, init=initializers.truncated_normal(stddev=0.01)):
     init_w = init
     init_b = initializers.constant(0.)
     normed = Lambda(lambda x: x / 255., output_shape=K.int_shape(X)[1:])(X)
     h_conv1 = Convolution2D(32, (8, 8), strides=(4, 4),
                             kernel_initializer=init_w, use_bias=False, padding='same')(normed)
     h_ln1 = LayerNormalization(activation=K.relu)(h_conv1)
     h_conv2 = Convolution2D(64, (4, 4), strides=(2, 2),
                             kernel_initializer=init_w, use_bias=False, padding='same')(h_ln1)
     h_ln2 = LayerNormalization(activation=K.relu)(h_conv2)
     h_conv3 = Convolution2D(64, (3, 3), strides=(1, 1),
                             kernel_initializer=init_w, use_bias=False, padding='same')(h_ln2)
     h_ln3 = LayerNormalization(activation=K.relu)(h_conv3)
     h_flat = Flatten()(h_ln3)
     fc_advantage = Dense(512, use_bias=False, kernel_initializer=init_w)(h_flat)
     h_ln_fc_advantage = LayerNormalization(activation=K.relu)(fc_advantage)
     advantage = Dense(NUM_ACTIONS, kernel_initializer=init_w,
                       use_bias=False, bias_initializer=init_b)(h_ln_fc_advantage)
     fc_value = Dense(512, use_bias=False, kernel_initializer=init_w)(h_flat)
     h_ln_fc_value = LayerNormalization(activation=K.relu)(fc_value)
     value = Dense(1, kernel_initializer=init_w, use_bias=False, bias_initializer=init_b)(h_ln_fc_value)
     z = Lambda(lambda x: x[1] + x[0] - K.mean(advantage, axis=1, keepdims=True), output_shape=(NUM_ACTIONS,))([advantage, value])
     # z = LayerNormalization()(fc2)
     model = Model(inputs=X, outputs=z)
     model.trainable = trainable
     return z, model
예제 #5
0
    def _build_model(self, pretrain_model):
        input_ids = Input(shape=(self.seq_length, ))
        input_mask = Input(shape=(self.seq_length, ))
        inputs = [input_ids, input_mask]
        if self.use_token_type:
            input_token_type_ids = Input(shape=(self.seq_length, ))
            inputs.append(input_token_type_ids)

        self.bert = BertModel(
            self.config,
            batch_size=self.batch_size,
            seq_length=self.seq_length,
            max_predictions_per_seq=self.max_predictions_per_seq,
            use_token_type=self.use_token_type,
            mask=self.mask)
        self.bert_encoder = self.bert.get_bert_encoder()
        self.bert_encoder.load_weights(pretrain_model)
        pooled_output = self.bert_encoder(inputs)
        pooled_output = Dropout(self.config.hidden_dropout_prob)(pooled_output)
        pred = Dense(units=self.num_classes,
                     activation='softmax',
                     kernel_initializer=initializers.truncated_normal(
                         stddev=self.config.initializer_range))(pooled_output)
        model = Model(inputs=inputs, outputs=pred)
        return model
예제 #6
0
 def get_classifer_model(self, num_classes):
     """construct model for classify """
     bert_encoder = Dropout(self.config.hidden_dropout_prob)(self.pooled_output)
     pred = Dense(units=num_classes,
                  activation='softmax',
                  kernel_initializer=initializers.truncated_normal(stddev=self.config.initializer_range),
                  )(bert_encoder)
     self.classifer_model = Model(inputs=self.inputs, outputs=pred)
     return self.classifer_model
예제 #7
0
 def get_next_sentence_model(self):
     """construct next sentence model for pretraining"""
     pooled_output = self.bert_model(self.inputs)
     pred = Dense(units=2,
                  activation='softmax',
                  kernel_initializer=initializers.truncated_normal(stddev=self.config.initializer_range)
                  )(pooled_output)
     self.next_sentence_model = Model(inputs=self.inputs, outputs=pred, name='next_sentence_model')
     return self.next_sentence_model
예제 #8
0
    def build(self, input_shape):
        if self.use_token_type:
            _, seq_length, input_width = input_shape[0]
            self.token_type_table = self.add_weight(
                shape=(self.token_type_vocab_size, input_width),
                initializer=initializers.truncated_normal(
                    stddev=self.initializer_range),
                name='token_type_embeddings')
        else:
            _, seq_length, input_width = input_shape

        if self.use_position_embeddings:
            assert seq_length <= self.max_position_embeddings
            self.full_position_embeddings = self.add_weight(
                shape=(self.max_position_embeddings, input_width),
                initializer=initializers.truncated_normal(
                    stddev=self.initializer_range),
                name='position_embeddings')
        super(Embedding_Postprocessor, self).build(input_shape)
예제 #9
0
def network(categorical_columns_item, num_deep_numeric_feature,
            num_wide_numeric_feature, bias):
    input_layers = list()
    embedding_layers = list()

    # net categorical deep feature
    for col, num in categorical_columns_item.items():
        input_deep_cat_layer = Input(shape=(1, ),
                                     name=col + "_categorical_deep_input")
        embedding_layer = Embedding(
            input_dim=num,
            output_dim=min(10, num // 2),
            embeddings_initializer=truncated_normal(mean=0,
                                                    stddev=1 / np.sqrt(num)),
            input_length=1,
            name=col + "_deep_embedding")(input_deep_cat_layer)
        embedding_layer = (Reshape(target_shape=(min(10, num // 2), ),
                                   name=col +
                                   "_deep_reshape")(embedding_layer))
        embedding_layer = Dropout(rate=0.15,
                                  noise_shape=(None, 1),
                                  name=col + "_deep_dropout")(embedding_layer)
        input_layers.append(input_deep_cat_layer)
        embedding_layers.append(embedding_layer)

    # net numeric deep feature
    input_deep_num_layer = Input(shape=(num_deep_numeric_feature, ),
                                 name="numeric_deep_input")
    input_layers.append(input_deep_num_layer)

    # net numeric wide feature
    input_wide_num_layer = Input(shape=(num_wide_numeric_feature, ),
                                 name="numeric_wide_input")
    input_layers.append(input_wide_num_layer)

    hidden_layer = Dense(units=32,
                         kernel_initializer=lecun_normal(),
                         activation="selu")(Concatenate()([
                             Concatenate()(embedding_layers),
                             Dropout(rate=0.15)(input_deep_num_layer)
                         ]))
    hidden_layer = Dense(units=16,
                         kernel_initializer=lecun_normal(),
                         activation="selu")(hidden_layer)
    hidden_layer = Dense(units=8,
                         kernel_initializer=lecun_normal(),
                         activation="selu")(hidden_layer)
    hidden_layer = Concatenate()([hidden_layer, input_wide_num_layer])
    output_layer = Dense(units=1,
                         kernel_initializer=lecun_normal(),
                         bias_initializer=constant(logit(bias)),
                         activation="sigmoid",
                         name="output_layer")(hidden_layer)

    return Model(input_layers, output_layer)
예제 #10
0
    def get_lm_model(self):
        """construct language model for pretraining"""
        config = self.config
        positions_input = Input(shape=(self.max_predictions_per_seq, ),
                                dtype='int32',
                                name='masked_lm_positions')
        cur_inputs = self.inputs + [positions_input]

        sequence_output = Lambda(function=lambda x: gather_indexes(x[0], x[1]),
                                 output_shape=lambda x:
                                 (x[0][0], x[1][1], x[0][2]))(
                                     [self.sequence_output, positions_input])

        sequence_output = Dense(
            units=config.hidden_size,
            activation=get_activation(config.hidden_act),
            kernel_initializer=initializers.truncated_normal(
                stddev=config.initializer_range),
        )(sequence_output)
        sequence_output = BatchNormalization(
            name='layer_norm_lm')(sequence_output)

        sequence_att = Lambda(
            function=lambda x: K.dot(
                x[0], K.permute_dimensions(x[1], pattern=(1, 0))),
            output_shape=lambda x: (x[0][0], x[0][1], x[1][0]),
        )([sequence_output, self.embedding_table])

        class AddBiasSoftmax(Layer):
            def __init__(self, **kwargs):
                self.supports_masking = True
                super(AddBiasSoftmax, self).__init__(**kwargs)

            def build(self, input_shape):
                self.bias = self.add_weight(
                    shape=(input_shape[-1], ),
                    name='output_bias',
                    initializer=initializers.get('zeros'))
                super(AddBiasSoftmax, self).build(input_shape)

            def call(self, inputs, **kwargs):
                output = K.bias_add(inputs, self.bias)
                output = K.softmax(output, axis=-1)
                return output

            def compute_output_shape(self, input_shape):
                return input_shape

        sequence_softmax = AddBiasSoftmax()(sequence_att)

        self.lm_model = Model(inputs=cur_inputs,
                              outputs=sequence_softmax,
                              name='lm_model')
        return self.lm_model
예제 #11
0
 def get_classifer_model(self, ):
     bert_encoder = Dropout(self.config.hidden_dropout_prob)(
         self.pooled_output)
     pred = Dense(
         units=2,
         activation='softmax',
         kernel_initializer=initializers.truncated_normal(
             stddev=self.config.initializer_range),
     )(bert_encoder)
     self.classifer_model = Model(inputs=self.inputs, outputs=pred)
     return self.next_sentence_model
예제 #12
0
def example_network(input_shape):

    im_input = Input(shape=input_shape)

    t = Conv3D(64, (11, 11, 11),
               padding='valid',
               kernel_initializer=initializers.truncated_normal(mean=0,
                                                                stddev=0.001),
               bias_initializer=initializers.constant(0.1))(im_input)
    t = Activation('relu')(t)
    t = MaxPool3D(pool_size=(2, 2, 2), padding='valid')(t)

    t = Conv3D(128, (6, 6, 6),
               padding='valid',
               kernel_initializer=initializers.truncated_normal(mean=0,
                                                                stddev=0.001),
               bias_initializer=initializers.constant(0.1))(t)
    t = Activation('relu')(t)
    t = MaxPool3D(pool_size=(2, 2, 2), padding='valid')(t)

    t = Conv3D(256, (3, 3, 3),
               padding="valid",
               kernel_initializer=initializers.truncated_normal(mean=0,
                                                                stddev=0.001),
               bias_initializer=initializers.constant(0.1))(t)
    t = Activation('relu')(t)

    t = Flatten()(t)

    t = Dense(1000,
              kernel_initializer=initializers.truncated_normal(mean=0,
                                                               stddev=1 /
                                                               np.sqrt(1000)),
              bias_initializer=initializers.constant(1.0))(t)
    t = Activation('relu')(t)
    t = Dropout(0.5)(t)

    t = Dense(500,
              kernel_initializer=initializers.truncated_normal(mean=0,
                                                               stddev=1 /
                                                               np.sqrt(500)),
              bias_initializer=initializers.constant(1.0))(t)
    t = Activation('relu')(t)
    t = Dropout(0.5)(t)

    t = Dense(200,
              kernel_initializer=initializers.truncated_normal(mean=0,
                                                               stddev=1 /
                                                               np.sqrt(200)),
              bias_initializer=initializers.constant(1.0))(t)
    t = Activation('relu')(t)
    t = Dropout(0.5)(t)

    t = Dense(1)(t)
    output = Activation('sigmoid')(t)

    model = Model(input=im_input, output=output)

    return model
예제 #13
0
 def build(self, input_shape):
     self.input_dim = input_shape[1]
     self.W = []
     self.bias = []
     for i in range(self.num_layer):
         self.W.append(
             self.add_weight(shape=[1, self.input_dim],
                             initializer=truncated_normal(stddev=0.01),
                             regularizer=l2(self.reg),
                             name='w_' + str(i)))
         self.bias.append(
             self.add_weight(shape=[1, self.input_dim],
                             initializer='zeros',
                             name='b_' + str(i)))
     self.built = True
예제 #14
0
def generator_model(im_size, output_channel=3):
    initializer = initializers.truncated_normal(stddev=0.1)
    model = Sequential()
    model.add(
        Dense(input_dim=100, units=512 * 4 * 4,
              kernel_initializer=initializer))
    model.add(Activation('linear'))

    model.add(Reshape((4, 4, 512)))
    model.add(
        Conv2DTranspose(256, (5, 5),
                        strides=(2, 2),
                        padding='same',
                        kernel_initializer=initializer))
    #model.add(BatchNormalization())
    model.add(Activation('tanh'))

    model.add(
        Conv2DTranspose(128, (5, 5),
                        strides=(2, 2),
                        padding='same',
                        kernel_initializer=initializer))
    #model.add(BatchNormalization())
    model.add(Activation('tanh'))

    model.add(
        Conv2DTranspose(64, (5, 5),
                        strides=(2, 2),
                        padding='same',
                        kernel_initializer=initializer))
    # model.add(BatchNormalization())
    model.add(Activation('tanh'))

    model.add(
        Conv2DTranspose(output_channel, (5, 5),
                        strides=(2, 2),
                        padding='same',
                        kernel_initializer=initializer))
    model.add(Activation('tanh'))
    return model
예제 #15
0
def discriminator_model(im_size, input_channel=3):
    initializer = initializers.truncated_normal(stddev=0.1)
    model = Sequential()
    model.add(
        Convolution2D(32, (5, 5),
                      padding='same',
                      input_shape=(im_size, im_size, input_channel),
                      strides=(2, 2),
                      kernel_initializer=initializer))
    model.add(LeakyReLU(0.2))

    model.add(
        Convolution2D(64, (5, 5),
                      padding='same',
                      strides=(2, 2),
                      kernel_initializer=initializer))
    #model.add(BatchNormalization())
    model.add(LeakyReLU(0.2))

    model.add(
        Convolution2D(128, (5, 5),
                      padding='same',
                      strides=(2, 2),
                      kernel_initializer=initializer))
    # model.add(BatchNormalization())
    model.add(LeakyReLU(0.2))

    #model.add(Convolution2D(512,(5, 5), padding='same', strides=(2,2),
    #                        kernel_initializer=initializer))
    #model.add(BatchNormalization())
    #model.add(LeakyReLU(0.2))

    model.add(Flatten())
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    return model
예제 #16
0
def create_model(window,
                 input_shape,
                 num_actions,
                 init_method,
                 model_name='q_network'):  # noqa: D103
    input_rows, input_cols = input_shape[0], input_shape[1]
    print 'Now we start building the model ... '
    model = Sequential()
    if init_method == 'he':
        model.add(
            Conv2D(16,
                   kernel_size=(8, 8),
                   strides=(4, 4),
                   padding='same',
                   kernel_initializer=initializers.he_normal(),
                   activation='relu',
                   input_shape=(window, input_rows, input_cols)))
        model.add(
            Conv2D(32,
                   kernel_size=(4, 4),
                   strides=(2, 2),
                   padding='same',
                   kernel_initializer=initializers.he_normal(),
                   activation='relu'))
        model.add(Flatten())
        model.add(
            Dense(256,
                  activation='relu',
                  kernel_initializer=initializers.he_normal()))
        model.add(Dense(num_actions, activation='linear'))
    elif init_method == 'default':
        model.add(
            Conv2D(16,
                   kernel_size=(8, 8),
                   strides=(4, 4),
                   padding='same',
                   activation='relu',
                   input_shape=(window, input_rows, input_cols)))
        model.add(
            Conv2D(32,
                   kernel_size=(4, 4),
                   strides=(2, 2),
                   padding='same',
                   activation='relu'))
        model.add(Flatten())
        model.add(Dense(256, activation='relu'))
        model.add(Dense(num_actions, activation='linear'))
    elif init_method == 'normal':
        model.add(
            Conv2D(
                16,
                kernel_size=(8, 8),
                strides=(4, 4),
                padding='same',
                kernel_initializer=initializers.truncated_normal(stddev=0.01),
                activation='relu',
                input_shape=(window, input_rows, input_cols)))
        model.add(
            Conv2D(
                32,
                kernel_size=(4, 4),
                strides=(2, 2),
                padding='same',
                activation='relu',
                kernel_initializer=initializers.truncated_normal(stddev=0.01)))
        model.add(Flatten())
        model.add(
            Dense(256,
                  kernel_initializer=initializers.random_normal(stddev=0.01),
                  activation='relu'))
        model.add(
            Dense(num_actions,
                  kernel_initializer=initializers.random_normal(stddev=0.01),
                  activation='linear'))
    return model
예제 #17
0
 def call(self, inputs, **kwargs):
     f_sigma = K.sum(inputs, axis=1, keepdims=True)
     p = K.batch_dot(tf.transpose(f_sigma, (0, 2, 1)), f_sigma)
     return Flatten()(Conv1D(self.output_dim, (self.embed_size, ),
                             kernel_initializer=truncated_normal(0.01),
                             kernel_regularizer=l2(self.reg))(p))
예제 #18
0
 def call(self, inputs, **kwargs):
     return Flatten()(Conv1D(
         self.output_dim, (self.field_dim, ),
         kernel_initializer=truncated_normal(stddev=0.01),
         kernel_regularizer=l2(self.reg))(inputs))
예제 #19
0
def basic(type, train, test, code, epoch, batch):

    # Load MNIST train and test data
    X_train = np.loadtxt(train, delimiter=',', dtype=None)
    X_test = np.loadtxt(test, delimiter=',', dtype=None)

    # z_list : define experiment code(Z) size
    z_list = [code]
    autoencoder = [[] for i in range(len(z_list))]

    # E : epoch, BS = batch size
    E = epoch
    BS = batch

    # Train model and save data(code(Z), output and total loss data)

    model_index = 0

    total_summary_loss_data = [
        'model_type', 'z_size', 'train_loss', 'test_loss'
    ]

    for z_size in z_list:

        # Define models

        INPUT_SIZE = 784
        HIDDEN_SIZE = z_size

        if type == "digit":
            w_initializer = initializers.truncated_normal(mean=0.0,
                                                          stddev=0.05,
                                                          seed=None)
            b_initializer = initializers.zeros()

            dense1 = Input(shape=(INPUT_SIZE, ))
            dense2 = Dense(HIDDEN_SIZE,
                           activation='linear',
                           kernel_initializer=w_initializer,
                           bias_initializer=b_initializer)(dense1)
            dense3 = Dense(INPUT_SIZE,
                           activation='sigmoid',
                           kernel_initializer=w_initializer,
                           bias_initializer=b_initializer)(dense2)

            autoencoder[model_index] = Model(dense1, dense3)

            adam = optimizers.Adam(lr=0.001)
            autoencoder[model_index].compile(loss='mean_squared_error',
                                             optimizer=adam)

            autoencoder[model_index].fit(X_train,
                                         X_train,
                                         epochs=E,
                                         batch_size=BS,
                                         verbose=0)

        else:
            w_initializer = initializers.glorot_uniform(seed=None)
            b_initializer = initializers.glorot_uniform(seed=None)

            dense1 = Input(shape=(INPUT_SIZE, ))
            dense2 = Dense(HIDDEN_SIZE,
                           activation='linear',
                           kernel_initializer=w_initializer,
                           bias_initializer=b_initializer)(dense1)
            dense3 = Dense(INPUT_SIZE,
                           activation='sigmoid',
                           kernel_initializer=w_initializer,
                           bias_initializer=b_initializer)(dense2)

            autoencoder[model_index] = Model(dense1, dense3)

            adagrad = optimizers.Adagrad(lr=0.01)
            autoencoder[model_index].compile(loss='mean_squared_error',
                                             optimizer=adagrad)

            autoencoder[model_index].fit(X_train,
                                         X_train,
                                         epochs=E,
                                         batch_size=BS,
                                         verbose=0)

        # Get output and calculate loss

        get_output = K.function([autoencoder[model_index].layers[0].input],
                                [autoencoder[model_index].layers[2].output])
        train_output = get_output([X_train])[0]
        test_output = get_output([X_test])[0]

        train_loss = np.sum((X_train - train_output)**
                            2) / (X_train.shape[0] * X_train.shape[1])
        test_loss = np.sum(
            (X_test - test_output)**2) / (X_test.shape[0] * X_test.shape[1])

        summary_loss_data = ['BAE', z_size, train_loss, test_loss]

        total_summary_loss_data = np.vstack(
            (total_summary_loss_data, summary_loss_data))

        np.savetxt("total_loss.csv",
                   total_summary_loss_data,
                   delimiter=',',
                   fmt='%s')

        np.savetxt("test_out.csv", test_output, delimiter=',')

        # Get code(Z)
        get_z = K.function([autoencoder[model_index].layers[0].input],
                           [autoencoder[model_index].layers[1].output])
        test_z = get_z([X_test])[0]

        np.savetxt("test_code.csv", test_z, delimiter=',')

        model_index = model_index + 1

    # Print total loss
    print(total_summary_loss_data)
    print("learning basic autoencoder model finish! \n")
    def build(self, input_shape):

        input_dim = input_shape[-1]

        self.W_K = self.add_weight(name='W_K',
                                   shape=(self.Nh, self.hidden_dim // self.Nh,
                                          input_dim),
                                   initializer='he_normal',
                                   trainable=True,
                                   regularizer=regularizers.l2(1e-4))

        if self.m_for_stem is None:
            self.W_V = self.add_weight(name='W_V',
                                       shape=(self.Nh,
                                              self.hidden_dim // self.Nh,
                                              input_dim),
                                       initializer='he_normal',
                                       trainable=True,
                                       regularizer=regularizers.l2(1e-4))
        else:
            self.W_V = self.add_weight(name='W_V',
                                       shape=(self.Nh,
                                              self.hidden_dim // self.Nh,
                                              input_dim, self.m_for_stem),
                                       initializer='he_normal',
                                       trainable=True,
                                       regularizer=regularizers.l2(1e-4))

        self.W_Q = self.add_weight(name="W_Q",
                                   shape=(self.Nh, self.hidden_dim // self.Nh,
                                          input_dim),
                                   initializer='he_normal',
                                   trainable=True,
                                   regularizer=regularizers.l2(1e-4))

        self.Rel_W = self.add_weight(
            name="Rel_W",
            shape=(self.Nh, 1, self.k_size, (self.hidden_dim // 2) // self.Nh),
            initializer=initializers.truncated_normal(),
            trainable=True,
            regularizer=regularizers.l2(1e-4))

        self.Rel_H = self.add_weight(
            name="Rel_H",
            shape=(self.Nh, self.k_size, 1, (self.hidden_dim // 2) // self.Nh),
            initializer=initializers.truncated_normal(),
            trainable=True,
            regularizer=regularizers.l2(1e-4))

        if self.m_for_stem is not None:
            self.emb_a = self.add_weight(name="emb_a",
                                         shape=(self.k_size, 1,
                                                self.hidden_dim // self.Nh),
                                         initializer='he_normal',
                                         trainable=True,
                                         regularizer=regularizers.l2(1e-4))
            self.emb_b = self.add_weight(name="emb_b",
                                         shape=(1, self.k_size,
                                                self.hidden_dim // self.Nh),
                                         initializer='he_normal',
                                         trainable=True,
                                         regularizer=regularizers.l2(1e-4))

            self.emb_mix = self.add_weight(name="emb_mix",
                                           shape=(self.m_for_stem,
                                                  self.hidden_dim // self.Nh),
                                           initializer='he_normal',
                                           trainable=True,
                                           regularizer=regularizers.l2(1e-4))

        super(SelfAttention, self).build(input_shape)
예제 #21
0
# make sure the input data shape:
train_data = np.reshape(train_data, [-1, 48, 48, 1])
test_data = np.reshape(test_data, [-1, 48, 48, 1])

############################################################
# Model
############################################################
# make around training parameters:
conv_layers = [[64, 64, 0, 128, 128, 0, 256, 256, 0, 512, 512, 0]]
kernel_size = [[3, 3, 0, 3, 3, 0, 3, 3, 0, 3, 3, 0]]
dense_layers = [[1024, 512]]
dp_layers = [0.5]
activa_fn = [['leaky_relu', 0.02], ['relu', 'relu']]
learn_rate = [0.001]
b_init = [['Constant', '0.01', Constant(0.01)]]
w_init = [['he_normal', 'he_normal', 'he_normal'], ['truncate_normal', 'M:0/S:0.02', truncated_normal(0, 0.02)]]
epochs = 100
validate_rate = 0.2
bt_size = [256]

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
)

# N-fold Validation:
n_splits = round(1/validate_rate)
nflist = utils.N_Fold_Validate(n_splits, train_data.shape[0])
예제 #22
0
def Conv(x, f_dim):
    return KL.Conv2D(filters=f_dim,
                     kernel_size=(5, 5),
                     strides=(2, 2),
                     padding='same',
                     kernel_initializer=KI.truncated_normal(stddev=0.02))(x)
     initializers.RandomUniform(maxval=0.1),
     dict(class_name="random_uniform", minval=-0.05, maxval=0.1, seed=None),
     id="ru_0",
 ),
 pytest.param(
     initializers.random_uniform(minval=-0.2, seed=42),
     dict(class_name="random_uniform", minval=-0.2, maxval=0.05, seed=42),
     id="ru_1",
 ),
 pytest.param(
     initializers.TruncatedNormal(0.1),
     dict(class_name="truncated_normal", mean=0.1, stddev=0.05, seed=None),
     id="tn_0",
 ),
 pytest.param(
     initializers.truncated_normal(mean=0.2, stddev=0.003, seed=42),
     dict(class_name="truncated_normal", mean=0.2, stddev=0.003, seed=42),
     id="tn_1",
 ),
 pytest.param(
     initializers.Orthogonal(1.1),
     dict(class_name="orthogonal", gain=1.1, seed=None),
     id="o_0",
 ),
 pytest.param(
     initializers.orthogonal(gain=1.2, seed=42),
     dict(class_name="orthogonal", gain=1.2, seed=42),
     id="o_1",
 ),
 pytest.param(initializers.Identity(1.1), dict(class_name="identity", gain=1.1), id="i_0"),
 pytest.param(initializers.identity(), dict(class_name="identity", gain=1.0), id="i_1"),
예제 #24
0
def train(trainjson, epoch, batch_size, netq, neta, netfull, activate,
          drop_out, modelname, reg_flag, normal_flag, optim):
    data = loadfromjson(trainjson)
    # get label
    taglist = []
    for index, item in enumerate(data['datalist']):
        if item[0] == '0':
            taglist.append(0)
        else:
            if item[0] == '1':
                taglist.append(1)
            else:
                print('EiRROR\n')
                print(index)
                taglist.append(0)
    # get answer vectors and question vectors
    xq = np.zeros((len(data['vectorlist1']), netq[0], 60), dtype='float32')
    xa = np.zeros((len(data['vectorlist1']), neta[0], 60), dtype='float32')
    for index1, items in enumerate(data['vectorlist1']):
        for index2, item2 in enumerate(items):
            if index2 == netq[0]:
                break
            xq[index1][index2] = item2
    for index1, items in enumerate(data['vectorlist2']):
        for index2, item2 in enumerate(items):
            if index2 == neta[0]:
                break
            xa[index1][index2] = item2
    ya = np.array(taglist)
    trueya = []
    truexa = []
    truexq = []
    for index, label in enumerate(taglist):
        if label == 1:
            trueya.append(label)
            truexa.append(xa[index])
            truexq.append(xq[index])
    assert (len(trueya) != 0)
    print(len(truexq))
    truexa = np.repeat(truexa, 2, axis=0)
    truexq = np.repeat(truexq, 2, axis=0)
    trueya = np.repeat(trueya, 2, axis=0)
    ya = np.concatenate((ya, np.array(trueya)))
    # print(xa.shape,truexa.shape)
    xa = np.concatenate((xa, np.array(truexa)))
    xq = np.concatenate((xq, np.array(truexq)))

    print('Build model...')
    # regularizer param
    if reg_flag == 'None':
        reg = None
    else:
        reg_rate = float(reg_flag.split('_')[1])
        if reg_flag.split('_')[0] == 'l1':
            reg = l1(reg_rate)
        else:
            reg = l2(reg_rate)

    if not os.path.isfile(modelname):
        # seperate LSTM for qustion and answers
        question_vector_input = Input(shape=(netq[0], 60),
                                      dtype="float32",
                                      name='question_vector_input')
        question_vector_mask = Masking(mask_value=0.0)(question_vector_input)
        question_features = LSTM(
            output_dim=netq[1],
            kernel_initializer=initializers.truncated_normal(
                stddev=0.01))(question_vector_mask)
        answer_vector_input = Input(shape=(neta[0], 60),
                                    dtype="float32",
                                    name='answer_vector_input')
        answer_vector_mask = Masking(mask_value=0.0)(answer_vector_input)
        answer_features = LSTM(
            output_dim=neta[1],
            kernel_initializer=initializers.truncated_normal(
                stddev=0.01))(answer_vector_mask)
        # merge two LSTMs
        question_features = normalization.BatchNormalization()(
            question_features)
        answer_features = normalization.BatchNormalization()(answer_features)
        features = concatenate([answer_features, question_features])
        # features = Activation(activate)(features)
        # full connected layer
        for dim in netfull:
            features = Dense(dim,
                             kernel_regularizer=reg,
                             use_bias=True,
                             kernel_initializer=initializers.truncated_normal(
                                 stddev=0.01))(features)
            # using normalization or not
            if (normal_flag == 'true'):
                features = normalization.BatchNormalization()(features)
            features = Activation(activate)(features)
        # drop out layer
        final_layer = Dropout(drop_out)(features)
        # sigmoid to 0-1
        main_output = Dense(1, activation='sigmoid',
                            name='main_output')(final_layer)
        # finish model
        model = Model(inputs=[question_vector_input, answer_vector_input],
                      outputs=[main_output])

        opt = optim.split('_')[0]

        if opt == 'rmsprop':
            opt = RMSprop((float)(optim.split('_')[1]))
        else:
            opt = Adam((float)(optim.split('_')[1]))

        model.compile(optimizer=opt,
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
    else:
        print('load previous model')
        model = load_model(modelname)
    best_score = 0
    best_epoch = 0
    if not os.path.isfile('../json_data/quicktest.json'):
        savetojson('../raw_data/dev.txt', '../json_data/quicktest.json', 8000)
    test_q, test_a = getvalid(netq[0], neta[0], '../json_data/quicktest.json')
    quelist, answerlist, datalist = getdata('../raw_data/dev.txt', 8000)

    for i in range(epoch):
        model.fit([xq, xa], [ya], batch_size=batch_size,
                  nb_epoch=1)  # 训练时间为若干个小时
        cur_score = valid(model, test_q, test_a, quelist, answerlist)
        print('In epoch', i + 1, 'MRR', cur_score)
        if cur_score > best_score:
            best_score = cur_score
            best_epoch = i + 1
        model.save(modelname[:-3] + str(epoch) + '.h5')
    print('best epoch', best_epoch, 'best MRR', best_score)
    f = open('../result/' + modelname[9:-3] + '.txt', 'w')
    f.write('best epoch' + str(best_epoch) + 'best MRR' + str(best_score))
    f.close()