def build_model(self): real_value_input = Input(shape=(self.field_dim[0], )) discrete_input = Input(shape=(self.field_dim[1], )) embeddings = Embedding( self.feature_dim + 1, self.embedding_size, embeddings_initializer=truncated_normal(stddev=self.init_std), embeddings_regularizer=l2(self.reg), mask_zero=True, trainable=True)(discrete_input) reshape = _Reshape(target_shape=(-1, ))(embeddings) # features = Concatenate(axis=1)([real_value_input, reshape]) features = Concatenate(axis=1)([real_value_input, reshape]) dense_network_out = features for each in self.hidden_size: dense_network_out = Dense( each, activation='relu', kernel_initializer=truncated_normal(stddev=self.init_std), kernel_regularizer=l2(self.reg))(dense_network_out) cross_network_out = CrossLayer(self.input_dim, self.cross_layer_num, self.reg)(features) # self.hidden_size[-1]+ self.field_dim[0] + self.field_dim[1] * self.embedding_size concat = Concatenate( axis=1, name='concat')([dense_network_out, cross_network_out]) output = Dense( 1, activation='sigmoid', kernel_initializer=truncated_normal(stddev=self.init_std), kernel_regularizer=l2(self.reg))(concat) return Model([real_value_input, discrete_input], [output])
def build_model(self): inputs = Input((self.field_dim, )) embeddings = Embedding(self.feature_dim + 1, self.embedding_size, embeddings_initializer=truncated_normal( self.init_std), embeddings_regularizer=l2(self.reg), mask_zero=False, trainable=True)(inputs) z = ZLayer(self.output_dim, self.reg)(embeddings) p = None if self.mode == 'outer': p = OuterProductLayer(self.output_dim, self.reg)(embeddings) else: pass features = Concatenate(axis=1)([z, p]) outputs = LeakyReLU(1.0)(features) for i in range(len(self.fully_list)): if i < len(self.fully_list) - 1: outputs = Dropout(self.keep_prob)(Dense( self.fully_list[i], activation='relu', kernel_initializer=truncated_normal(stddev=self.init_std), kernel_regularizer=l2(self.reg))(outputs)) else: outputs = Dense( 1, activation='sigmoid', kernel_initializer=truncated_normal(stddev=self.init_std), kernel_regularizer=l2(self.reg))(outputs) return Model([inputs], outputs)
def build(self, input_shape): assert len(input_shape) == 2 self.W_query = self.add_weight( shape=(input_shape[0][-1], self.num_attention_heads * self.size_per_head), name='Wq', initializer=initializers.truncated_normal( stddev=self.initializer_range)) self.bias_query = self.add_weight( shape=(self.num_attention_heads * self.size_per_head, ), name='bq', initializer=initializers.get('zeros')) self.W_key = self.add_weight( shape=(input_shape[1][-1], self.num_attention_heads * self.size_per_head), name='Wk', initializer=initializers.truncated_normal( stddev=self.initializer_range)) self.bias_key = self.add_weight(shape=(self.num_attention_heads * self.size_per_head, ), name='bk', initializer=initializers.get('zeros')) self.W_value = self.add_weight( shape=(input_shape[1][-1], self.num_attention_heads * self.size_per_head), name='Wv', initializer=initializers.truncated_normal( stddev=self.initializer_range)) self.bias_value = self.add_weight( shape=(self.num_attention_heads * self.size_per_head, ), name='bv', initializer=initializers.get('zeros')) super(MultiHeadAttentionLayer, self).build(input_shape)
def infer(X, trainable=True, init=initializers.truncated_normal(stddev=0.01)): init_w = init init_b = initializers.constant(0.) normed = Lambda(lambda x: x / 255., output_shape=K.int_shape(X)[1:])(X) h_conv1 = Convolution2D(32, (8, 8), strides=(4, 4), kernel_initializer=init_w, use_bias=False, padding='same')(normed) h_ln1 = LayerNormalization(activation=K.relu)(h_conv1) h_conv2 = Convolution2D(64, (4, 4), strides=(2, 2), kernel_initializer=init_w, use_bias=False, padding='same')(h_ln1) h_ln2 = LayerNormalization(activation=K.relu)(h_conv2) h_conv3 = Convolution2D(64, (3, 3), strides=(1, 1), kernel_initializer=init_w, use_bias=False, padding='same')(h_ln2) h_ln3 = LayerNormalization(activation=K.relu)(h_conv3) h_flat = Flatten()(h_ln3) fc_advantage = Dense(512, use_bias=False, kernel_initializer=init_w)(h_flat) h_ln_fc_advantage = LayerNormalization(activation=K.relu)(fc_advantage) advantage = Dense(NUM_ACTIONS, kernel_initializer=init_w, use_bias=False, bias_initializer=init_b)(h_ln_fc_advantage) fc_value = Dense(512, use_bias=False, kernel_initializer=init_w)(h_flat) h_ln_fc_value = LayerNormalization(activation=K.relu)(fc_value) value = Dense(1, kernel_initializer=init_w, use_bias=False, bias_initializer=init_b)(h_ln_fc_value) z = Lambda(lambda x: x[1] + x[0] - K.mean(advantage, axis=1, keepdims=True), output_shape=(NUM_ACTIONS,))([advantage, value]) # z = LayerNormalization()(fc2) model = Model(inputs=X, outputs=z) model.trainable = trainable return z, model
def _build_model(self, pretrain_model): input_ids = Input(shape=(self.seq_length, )) input_mask = Input(shape=(self.seq_length, )) inputs = [input_ids, input_mask] if self.use_token_type: input_token_type_ids = Input(shape=(self.seq_length, )) inputs.append(input_token_type_ids) self.bert = BertModel( self.config, batch_size=self.batch_size, seq_length=self.seq_length, max_predictions_per_seq=self.max_predictions_per_seq, use_token_type=self.use_token_type, mask=self.mask) self.bert_encoder = self.bert.get_bert_encoder() self.bert_encoder.load_weights(pretrain_model) pooled_output = self.bert_encoder(inputs) pooled_output = Dropout(self.config.hidden_dropout_prob)(pooled_output) pred = Dense(units=self.num_classes, activation='softmax', kernel_initializer=initializers.truncated_normal( stddev=self.config.initializer_range))(pooled_output) model = Model(inputs=inputs, outputs=pred) return model
def get_classifer_model(self, num_classes): """construct model for classify """ bert_encoder = Dropout(self.config.hidden_dropout_prob)(self.pooled_output) pred = Dense(units=num_classes, activation='softmax', kernel_initializer=initializers.truncated_normal(stddev=self.config.initializer_range), )(bert_encoder) self.classifer_model = Model(inputs=self.inputs, outputs=pred) return self.classifer_model
def get_next_sentence_model(self): """construct next sentence model for pretraining""" pooled_output = self.bert_model(self.inputs) pred = Dense(units=2, activation='softmax', kernel_initializer=initializers.truncated_normal(stddev=self.config.initializer_range) )(pooled_output) self.next_sentence_model = Model(inputs=self.inputs, outputs=pred, name='next_sentence_model') return self.next_sentence_model
def build(self, input_shape): if self.use_token_type: _, seq_length, input_width = input_shape[0] self.token_type_table = self.add_weight( shape=(self.token_type_vocab_size, input_width), initializer=initializers.truncated_normal( stddev=self.initializer_range), name='token_type_embeddings') else: _, seq_length, input_width = input_shape if self.use_position_embeddings: assert seq_length <= self.max_position_embeddings self.full_position_embeddings = self.add_weight( shape=(self.max_position_embeddings, input_width), initializer=initializers.truncated_normal( stddev=self.initializer_range), name='position_embeddings') super(Embedding_Postprocessor, self).build(input_shape)
def network(categorical_columns_item, num_deep_numeric_feature, num_wide_numeric_feature, bias): input_layers = list() embedding_layers = list() # net categorical deep feature for col, num in categorical_columns_item.items(): input_deep_cat_layer = Input(shape=(1, ), name=col + "_categorical_deep_input") embedding_layer = Embedding( input_dim=num, output_dim=min(10, num // 2), embeddings_initializer=truncated_normal(mean=0, stddev=1 / np.sqrt(num)), input_length=1, name=col + "_deep_embedding")(input_deep_cat_layer) embedding_layer = (Reshape(target_shape=(min(10, num // 2), ), name=col + "_deep_reshape")(embedding_layer)) embedding_layer = Dropout(rate=0.15, noise_shape=(None, 1), name=col + "_deep_dropout")(embedding_layer) input_layers.append(input_deep_cat_layer) embedding_layers.append(embedding_layer) # net numeric deep feature input_deep_num_layer = Input(shape=(num_deep_numeric_feature, ), name="numeric_deep_input") input_layers.append(input_deep_num_layer) # net numeric wide feature input_wide_num_layer = Input(shape=(num_wide_numeric_feature, ), name="numeric_wide_input") input_layers.append(input_wide_num_layer) hidden_layer = Dense(units=32, kernel_initializer=lecun_normal(), activation="selu")(Concatenate()([ Concatenate()(embedding_layers), Dropout(rate=0.15)(input_deep_num_layer) ])) hidden_layer = Dense(units=16, kernel_initializer=lecun_normal(), activation="selu")(hidden_layer) hidden_layer = Dense(units=8, kernel_initializer=lecun_normal(), activation="selu")(hidden_layer) hidden_layer = Concatenate()([hidden_layer, input_wide_num_layer]) output_layer = Dense(units=1, kernel_initializer=lecun_normal(), bias_initializer=constant(logit(bias)), activation="sigmoid", name="output_layer")(hidden_layer) return Model(input_layers, output_layer)
def get_lm_model(self): """construct language model for pretraining""" config = self.config positions_input = Input(shape=(self.max_predictions_per_seq, ), dtype='int32', name='masked_lm_positions') cur_inputs = self.inputs + [positions_input] sequence_output = Lambda(function=lambda x: gather_indexes(x[0], x[1]), output_shape=lambda x: (x[0][0], x[1][1], x[0][2]))( [self.sequence_output, positions_input]) sequence_output = Dense( units=config.hidden_size, activation=get_activation(config.hidden_act), kernel_initializer=initializers.truncated_normal( stddev=config.initializer_range), )(sequence_output) sequence_output = BatchNormalization( name='layer_norm_lm')(sequence_output) sequence_att = Lambda( function=lambda x: K.dot( x[0], K.permute_dimensions(x[1], pattern=(1, 0))), output_shape=lambda x: (x[0][0], x[0][1], x[1][0]), )([sequence_output, self.embedding_table]) class AddBiasSoftmax(Layer): def __init__(self, **kwargs): self.supports_masking = True super(AddBiasSoftmax, self).__init__(**kwargs) def build(self, input_shape): self.bias = self.add_weight( shape=(input_shape[-1], ), name='output_bias', initializer=initializers.get('zeros')) super(AddBiasSoftmax, self).build(input_shape) def call(self, inputs, **kwargs): output = K.bias_add(inputs, self.bias) output = K.softmax(output, axis=-1) return output def compute_output_shape(self, input_shape): return input_shape sequence_softmax = AddBiasSoftmax()(sequence_att) self.lm_model = Model(inputs=cur_inputs, outputs=sequence_softmax, name='lm_model') return self.lm_model
def get_classifer_model(self, ): bert_encoder = Dropout(self.config.hidden_dropout_prob)( self.pooled_output) pred = Dense( units=2, activation='softmax', kernel_initializer=initializers.truncated_normal( stddev=self.config.initializer_range), )(bert_encoder) self.classifer_model = Model(inputs=self.inputs, outputs=pred) return self.next_sentence_model
def example_network(input_shape): im_input = Input(shape=input_shape) t = Conv3D(64, (11, 11, 11), padding='valid', kernel_initializer=initializers.truncated_normal(mean=0, stddev=0.001), bias_initializer=initializers.constant(0.1))(im_input) t = Activation('relu')(t) t = MaxPool3D(pool_size=(2, 2, 2), padding='valid')(t) t = Conv3D(128, (6, 6, 6), padding='valid', kernel_initializer=initializers.truncated_normal(mean=0, stddev=0.001), bias_initializer=initializers.constant(0.1))(t) t = Activation('relu')(t) t = MaxPool3D(pool_size=(2, 2, 2), padding='valid')(t) t = Conv3D(256, (3, 3, 3), padding="valid", kernel_initializer=initializers.truncated_normal(mean=0, stddev=0.001), bias_initializer=initializers.constant(0.1))(t) t = Activation('relu')(t) t = Flatten()(t) t = Dense(1000, kernel_initializer=initializers.truncated_normal(mean=0, stddev=1 / np.sqrt(1000)), bias_initializer=initializers.constant(1.0))(t) t = Activation('relu')(t) t = Dropout(0.5)(t) t = Dense(500, kernel_initializer=initializers.truncated_normal(mean=0, stddev=1 / np.sqrt(500)), bias_initializer=initializers.constant(1.0))(t) t = Activation('relu')(t) t = Dropout(0.5)(t) t = Dense(200, kernel_initializer=initializers.truncated_normal(mean=0, stddev=1 / np.sqrt(200)), bias_initializer=initializers.constant(1.0))(t) t = Activation('relu')(t) t = Dropout(0.5)(t) t = Dense(1)(t) output = Activation('sigmoid')(t) model = Model(input=im_input, output=output) return model
def build(self, input_shape): self.input_dim = input_shape[1] self.W = [] self.bias = [] for i in range(self.num_layer): self.W.append( self.add_weight(shape=[1, self.input_dim], initializer=truncated_normal(stddev=0.01), regularizer=l2(self.reg), name='w_' + str(i))) self.bias.append( self.add_weight(shape=[1, self.input_dim], initializer='zeros', name='b_' + str(i))) self.built = True
def generator_model(im_size, output_channel=3): initializer = initializers.truncated_normal(stddev=0.1) model = Sequential() model.add( Dense(input_dim=100, units=512 * 4 * 4, kernel_initializer=initializer)) model.add(Activation('linear')) model.add(Reshape((4, 4, 512))) model.add( Conv2DTranspose(256, (5, 5), strides=(2, 2), padding='same', kernel_initializer=initializer)) #model.add(BatchNormalization()) model.add(Activation('tanh')) model.add( Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same', kernel_initializer=initializer)) #model.add(BatchNormalization()) model.add(Activation('tanh')) model.add( Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', kernel_initializer=initializer)) # model.add(BatchNormalization()) model.add(Activation('tanh')) model.add( Conv2DTranspose(output_channel, (5, 5), strides=(2, 2), padding='same', kernel_initializer=initializer)) model.add(Activation('tanh')) return model
def discriminator_model(im_size, input_channel=3): initializer = initializers.truncated_normal(stddev=0.1) model = Sequential() model.add( Convolution2D(32, (5, 5), padding='same', input_shape=(im_size, im_size, input_channel), strides=(2, 2), kernel_initializer=initializer)) model.add(LeakyReLU(0.2)) model.add( Convolution2D(64, (5, 5), padding='same', strides=(2, 2), kernel_initializer=initializer)) #model.add(BatchNormalization()) model.add(LeakyReLU(0.2)) model.add( Convolution2D(128, (5, 5), padding='same', strides=(2, 2), kernel_initializer=initializer)) # model.add(BatchNormalization()) model.add(LeakyReLU(0.2)) #model.add(Convolution2D(512,(5, 5), padding='same', strides=(2,2), # kernel_initializer=initializer)) #model.add(BatchNormalization()) #model.add(LeakyReLU(0.2)) model.add(Flatten()) model.add(Dense(1)) model.add(Activation('sigmoid')) return model
def create_model(window, input_shape, num_actions, init_method, model_name='q_network'): # noqa: D103 input_rows, input_cols = input_shape[0], input_shape[1] print 'Now we start building the model ... ' model = Sequential() if init_method == 'he': model.add( Conv2D(16, kernel_size=(8, 8), strides=(4, 4), padding='same', kernel_initializer=initializers.he_normal(), activation='relu', input_shape=(window, input_rows, input_cols))) model.add( Conv2D(32, kernel_size=(4, 4), strides=(2, 2), padding='same', kernel_initializer=initializers.he_normal(), activation='relu')) model.add(Flatten()) model.add( Dense(256, activation='relu', kernel_initializer=initializers.he_normal())) model.add(Dense(num_actions, activation='linear')) elif init_method == 'default': model.add( Conv2D(16, kernel_size=(8, 8), strides=(4, 4), padding='same', activation='relu', input_shape=(window, input_rows, input_cols))) model.add( Conv2D(32, kernel_size=(4, 4), strides=(2, 2), padding='same', activation='relu')) model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dense(num_actions, activation='linear')) elif init_method == 'normal': model.add( Conv2D( 16, kernel_size=(8, 8), strides=(4, 4), padding='same', kernel_initializer=initializers.truncated_normal(stddev=0.01), activation='relu', input_shape=(window, input_rows, input_cols))) model.add( Conv2D( 32, kernel_size=(4, 4), strides=(2, 2), padding='same', activation='relu', kernel_initializer=initializers.truncated_normal(stddev=0.01))) model.add(Flatten()) model.add( Dense(256, kernel_initializer=initializers.random_normal(stddev=0.01), activation='relu')) model.add( Dense(num_actions, kernel_initializer=initializers.random_normal(stddev=0.01), activation='linear')) return model
def call(self, inputs, **kwargs): f_sigma = K.sum(inputs, axis=1, keepdims=True) p = K.batch_dot(tf.transpose(f_sigma, (0, 2, 1)), f_sigma) return Flatten()(Conv1D(self.output_dim, (self.embed_size, ), kernel_initializer=truncated_normal(0.01), kernel_regularizer=l2(self.reg))(p))
def call(self, inputs, **kwargs): return Flatten()(Conv1D( self.output_dim, (self.field_dim, ), kernel_initializer=truncated_normal(stddev=0.01), kernel_regularizer=l2(self.reg))(inputs))
def basic(type, train, test, code, epoch, batch): # Load MNIST train and test data X_train = np.loadtxt(train, delimiter=',', dtype=None) X_test = np.loadtxt(test, delimiter=',', dtype=None) # z_list : define experiment code(Z) size z_list = [code] autoencoder = [[] for i in range(len(z_list))] # E : epoch, BS = batch size E = epoch BS = batch # Train model and save data(code(Z), output and total loss data) model_index = 0 total_summary_loss_data = [ 'model_type', 'z_size', 'train_loss', 'test_loss' ] for z_size in z_list: # Define models INPUT_SIZE = 784 HIDDEN_SIZE = z_size if type == "digit": w_initializer = initializers.truncated_normal(mean=0.0, stddev=0.05, seed=None) b_initializer = initializers.zeros() dense1 = Input(shape=(INPUT_SIZE, )) dense2 = Dense(HIDDEN_SIZE, activation='linear', kernel_initializer=w_initializer, bias_initializer=b_initializer)(dense1) dense3 = Dense(INPUT_SIZE, activation='sigmoid', kernel_initializer=w_initializer, bias_initializer=b_initializer)(dense2) autoencoder[model_index] = Model(dense1, dense3) adam = optimizers.Adam(lr=0.001) autoencoder[model_index].compile(loss='mean_squared_error', optimizer=adam) autoencoder[model_index].fit(X_train, X_train, epochs=E, batch_size=BS, verbose=0) else: w_initializer = initializers.glorot_uniform(seed=None) b_initializer = initializers.glorot_uniform(seed=None) dense1 = Input(shape=(INPUT_SIZE, )) dense2 = Dense(HIDDEN_SIZE, activation='linear', kernel_initializer=w_initializer, bias_initializer=b_initializer)(dense1) dense3 = Dense(INPUT_SIZE, activation='sigmoid', kernel_initializer=w_initializer, bias_initializer=b_initializer)(dense2) autoencoder[model_index] = Model(dense1, dense3) adagrad = optimizers.Adagrad(lr=0.01) autoencoder[model_index].compile(loss='mean_squared_error', optimizer=adagrad) autoencoder[model_index].fit(X_train, X_train, epochs=E, batch_size=BS, verbose=0) # Get output and calculate loss get_output = K.function([autoencoder[model_index].layers[0].input], [autoencoder[model_index].layers[2].output]) train_output = get_output([X_train])[0] test_output = get_output([X_test])[0] train_loss = np.sum((X_train - train_output)** 2) / (X_train.shape[0] * X_train.shape[1]) test_loss = np.sum( (X_test - test_output)**2) / (X_test.shape[0] * X_test.shape[1]) summary_loss_data = ['BAE', z_size, train_loss, test_loss] total_summary_loss_data = np.vstack( (total_summary_loss_data, summary_loss_data)) np.savetxt("total_loss.csv", total_summary_loss_data, delimiter=',', fmt='%s') np.savetxt("test_out.csv", test_output, delimiter=',') # Get code(Z) get_z = K.function([autoencoder[model_index].layers[0].input], [autoencoder[model_index].layers[1].output]) test_z = get_z([X_test])[0] np.savetxt("test_code.csv", test_z, delimiter=',') model_index = model_index + 1 # Print total loss print(total_summary_loss_data) print("learning basic autoencoder model finish! \n")
def build(self, input_shape): input_dim = input_shape[-1] self.W_K = self.add_weight(name='W_K', shape=(self.Nh, self.hidden_dim // self.Nh, input_dim), initializer='he_normal', trainable=True, regularizer=regularizers.l2(1e-4)) if self.m_for_stem is None: self.W_V = self.add_weight(name='W_V', shape=(self.Nh, self.hidden_dim // self.Nh, input_dim), initializer='he_normal', trainable=True, regularizer=regularizers.l2(1e-4)) else: self.W_V = self.add_weight(name='W_V', shape=(self.Nh, self.hidden_dim // self.Nh, input_dim, self.m_for_stem), initializer='he_normal', trainable=True, regularizer=regularizers.l2(1e-4)) self.W_Q = self.add_weight(name="W_Q", shape=(self.Nh, self.hidden_dim // self.Nh, input_dim), initializer='he_normal', trainable=True, regularizer=regularizers.l2(1e-4)) self.Rel_W = self.add_weight( name="Rel_W", shape=(self.Nh, 1, self.k_size, (self.hidden_dim // 2) // self.Nh), initializer=initializers.truncated_normal(), trainable=True, regularizer=regularizers.l2(1e-4)) self.Rel_H = self.add_weight( name="Rel_H", shape=(self.Nh, self.k_size, 1, (self.hidden_dim // 2) // self.Nh), initializer=initializers.truncated_normal(), trainable=True, regularizer=regularizers.l2(1e-4)) if self.m_for_stem is not None: self.emb_a = self.add_weight(name="emb_a", shape=(self.k_size, 1, self.hidden_dim // self.Nh), initializer='he_normal', trainable=True, regularizer=regularizers.l2(1e-4)) self.emb_b = self.add_weight(name="emb_b", shape=(1, self.k_size, self.hidden_dim // self.Nh), initializer='he_normal', trainable=True, regularizer=regularizers.l2(1e-4)) self.emb_mix = self.add_weight(name="emb_mix", shape=(self.m_for_stem, self.hidden_dim // self.Nh), initializer='he_normal', trainable=True, regularizer=regularizers.l2(1e-4)) super(SelfAttention, self).build(input_shape)
# make sure the input data shape: train_data = np.reshape(train_data, [-1, 48, 48, 1]) test_data = np.reshape(test_data, [-1, 48, 48, 1]) ############################################################ # Model ############################################################ # make around training parameters: conv_layers = [[64, 64, 0, 128, 128, 0, 256, 256, 0, 512, 512, 0]] kernel_size = [[3, 3, 0, 3, 3, 0, 3, 3, 0, 3, 3, 0]] dense_layers = [[1024, 512]] dp_layers = [0.5] activa_fn = [['leaky_relu', 0.02], ['relu', 'relu']] learn_rate = [0.001] b_init = [['Constant', '0.01', Constant(0.01)]] w_init = [['he_normal', 'he_normal', 'he_normal'], ['truncate_normal', 'M:0/S:0.02', truncated_normal(0, 0.02)]] epochs = 100 validate_rate = 0.2 bt_size = [256] datagen = ImageDataGenerator( rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, zoom_range=0.2, horizontal_flip=True, ) # N-fold Validation: n_splits = round(1/validate_rate) nflist = utils.N_Fold_Validate(n_splits, train_data.shape[0])
def Conv(x, f_dim): return KL.Conv2D(filters=f_dim, kernel_size=(5, 5), strides=(2, 2), padding='same', kernel_initializer=KI.truncated_normal(stddev=0.02))(x)
initializers.RandomUniform(maxval=0.1), dict(class_name="random_uniform", minval=-0.05, maxval=0.1, seed=None), id="ru_0", ), pytest.param( initializers.random_uniform(minval=-0.2, seed=42), dict(class_name="random_uniform", minval=-0.2, maxval=0.05, seed=42), id="ru_1", ), pytest.param( initializers.TruncatedNormal(0.1), dict(class_name="truncated_normal", mean=0.1, stddev=0.05, seed=None), id="tn_0", ), pytest.param( initializers.truncated_normal(mean=0.2, stddev=0.003, seed=42), dict(class_name="truncated_normal", mean=0.2, stddev=0.003, seed=42), id="tn_1", ), pytest.param( initializers.Orthogonal(1.1), dict(class_name="orthogonal", gain=1.1, seed=None), id="o_0", ), pytest.param( initializers.orthogonal(gain=1.2, seed=42), dict(class_name="orthogonal", gain=1.2, seed=42), id="o_1", ), pytest.param(initializers.Identity(1.1), dict(class_name="identity", gain=1.1), id="i_0"), pytest.param(initializers.identity(), dict(class_name="identity", gain=1.0), id="i_1"),
def train(trainjson, epoch, batch_size, netq, neta, netfull, activate, drop_out, modelname, reg_flag, normal_flag, optim): data = loadfromjson(trainjson) # get label taglist = [] for index, item in enumerate(data['datalist']): if item[0] == '0': taglist.append(0) else: if item[0] == '1': taglist.append(1) else: print('EiRROR\n') print(index) taglist.append(0) # get answer vectors and question vectors xq = np.zeros((len(data['vectorlist1']), netq[0], 60), dtype='float32') xa = np.zeros((len(data['vectorlist1']), neta[0], 60), dtype='float32') for index1, items in enumerate(data['vectorlist1']): for index2, item2 in enumerate(items): if index2 == netq[0]: break xq[index1][index2] = item2 for index1, items in enumerate(data['vectorlist2']): for index2, item2 in enumerate(items): if index2 == neta[0]: break xa[index1][index2] = item2 ya = np.array(taglist) trueya = [] truexa = [] truexq = [] for index, label in enumerate(taglist): if label == 1: trueya.append(label) truexa.append(xa[index]) truexq.append(xq[index]) assert (len(trueya) != 0) print(len(truexq)) truexa = np.repeat(truexa, 2, axis=0) truexq = np.repeat(truexq, 2, axis=0) trueya = np.repeat(trueya, 2, axis=0) ya = np.concatenate((ya, np.array(trueya))) # print(xa.shape,truexa.shape) xa = np.concatenate((xa, np.array(truexa))) xq = np.concatenate((xq, np.array(truexq))) print('Build model...') # regularizer param if reg_flag == 'None': reg = None else: reg_rate = float(reg_flag.split('_')[1]) if reg_flag.split('_')[0] == 'l1': reg = l1(reg_rate) else: reg = l2(reg_rate) if not os.path.isfile(modelname): # seperate LSTM for qustion and answers question_vector_input = Input(shape=(netq[0], 60), dtype="float32", name='question_vector_input') question_vector_mask = Masking(mask_value=0.0)(question_vector_input) question_features = LSTM( output_dim=netq[1], kernel_initializer=initializers.truncated_normal( stddev=0.01))(question_vector_mask) answer_vector_input = Input(shape=(neta[0], 60), dtype="float32", name='answer_vector_input') answer_vector_mask = Masking(mask_value=0.0)(answer_vector_input) answer_features = LSTM( output_dim=neta[1], kernel_initializer=initializers.truncated_normal( stddev=0.01))(answer_vector_mask) # merge two LSTMs question_features = normalization.BatchNormalization()( question_features) answer_features = normalization.BatchNormalization()(answer_features) features = concatenate([answer_features, question_features]) # features = Activation(activate)(features) # full connected layer for dim in netfull: features = Dense(dim, kernel_regularizer=reg, use_bias=True, kernel_initializer=initializers.truncated_normal( stddev=0.01))(features) # using normalization or not if (normal_flag == 'true'): features = normalization.BatchNormalization()(features) features = Activation(activate)(features) # drop out layer final_layer = Dropout(drop_out)(features) # sigmoid to 0-1 main_output = Dense(1, activation='sigmoid', name='main_output')(final_layer) # finish model model = Model(inputs=[question_vector_input, answer_vector_input], outputs=[main_output]) opt = optim.split('_')[0] if opt == 'rmsprop': opt = RMSprop((float)(optim.split('_')[1])) else: opt = Adam((float)(optim.split('_')[1])) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) else: print('load previous model') model = load_model(modelname) best_score = 0 best_epoch = 0 if not os.path.isfile('../json_data/quicktest.json'): savetojson('../raw_data/dev.txt', '../json_data/quicktest.json', 8000) test_q, test_a = getvalid(netq[0], neta[0], '../json_data/quicktest.json') quelist, answerlist, datalist = getdata('../raw_data/dev.txt', 8000) for i in range(epoch): model.fit([xq, xa], [ya], batch_size=batch_size, nb_epoch=1) # 训练时间为若干个小时 cur_score = valid(model, test_q, test_a, quelist, answerlist) print('In epoch', i + 1, 'MRR', cur_score) if cur_score > best_score: best_score = cur_score best_epoch = i + 1 model.save(modelname[:-3] + str(epoch) + '.h5') print('best epoch', best_epoch, 'best MRR', best_score) f = open('../result/' + modelname[9:-3] + '.txt', 'w') f.write('best epoch' + str(best_epoch) + 'best MRR' + str(best_score)) f.close()