def __init__(self, ksize, filters, nblocks, dropout=False, batch_norms=False): super(Discriminator, self).__init__() self.nblocks = nblocks self.ksize = ksize self.filters = filters self.blocks = [] self.dropout = dropout for i in range(nblocks): block = ResBlock( ksize, filters, pooling=True, noisy=False, leaky=True, batch_norms=batch_norms) self.blocks.append(block) self.conv_0 = tf.keras.layers.Conv1D( filters, ksize, activation='linear', padding='same', kernel_initializer=TruncatedNormal(stddev=0.02), bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.0001), bias_regularizer=tf.keras.regularizers.l2(0.00001)) self.global_pooling = tf.keras.layers.GlobalMaxPooling1D() self.dropout_layer = tf.keras.layers.Dropout(0.5) self.dense_final = tf.keras.layers.Dense( units=1, bias_initializer='zeros', kernel_initializer=TruncatedNormal(stddev=0.02), kernel_regularizer=tf.keras.regularizers.l2(0.0001), bias_regularizer=tf.keras.regularizers.l2(0.00001), # activity_regularizer=tf.keras.regularizers.l2(0.01), activation='linear')
def build(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) != 2: raise ValueError( 'A `SelfMultiHeadAttention` layer should be called on a list of 2 tensors' ) if len(input_shape[0]) != 3 or len(input_shape[1]) != 2: raise ValueError( 'input: [N, T_k, d_model], key masks: [N, key_seqlen]') embedding_size = int(input_shape[0][-1]) if self.num_units is None: self.num_units = embedding_size self.W = self.add_weight(name='Q_K_V', shape=[embedding_size, self.num_units * 3], dtype=tf.float32, initializer=TruncatedNormal(seed=self.seed)) self.W_output = self.add_weight( name='output_W', shape=[self.num_units, self.num_units], dtype=tf.float32, initializer=TruncatedNormal(seed=self.seed)) self.layer_norm = LayerNormalization() self.attention = DotAttention(scale=self.scale) self.softmax_weight_sum = SoftmaxWeightedSum( dropout_rate=self.dropout_rate, future_binding=self.future_binding, seed=self.seed) self.dropout = tf.keras.layers.Dropout(self.dropout_rate, seed=self.seed) self.seq_len_max = int(input_shape[0][1]) # Be sure to call this somewhere! super(SelfMultiHeadAttention, self).build(input_shape)
def build(self, input_shape): # Create a trainable weight variable for this layer. if self.sess_max_count == 1: embed_size = input_shape[2].value seq_len_max = input_shape[1].value else: embed_size = input_shape[0][2].value seq_len_max = input_shape[0][1].value self.sess_bias_embedding = self.add_weight( 'sess_bias_embedding', shape=(self.sess_max_count, 1, 1), initializer=TruncatedNormal(mean=0.0, stddev=0.0001, seed=self.seed)) self.seq_bias_embedding = self.add_weight('seq_bias_embedding', shape=(1, seq_len_max, 1), initializer=TruncatedNormal( mean=0.0, stddev=0.0001, seed=self.seed)) self.item_bias_embedding = self.add_weight('item_bias_embedding', shape=(1, 1, embed_size), initializer=TruncatedNormal( mean=0.0, stddev=0.0001, seed=self.seed)) # Be sure to call this somewhere! super(BiasEncoding, self).build(input_shape)
def __init__(self, embed_dim, num_heads=8): super(MultiHeadSelfAttention, self).__init__() self.embed_dim = embed_dim self.num_heads = num_heads if embed_dim % num_heads != 0: raise ValueError( f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}" ) self.projection_dim = embed_dim // num_heads self.query_dense = Dense(embed_dim, kernel_initializer=TruncatedNormal( mean=0., stddev=TRUNC_STD), use_bias=False) self.key_dense = Dense(embed_dim, kernel_initializer=TruncatedNormal( mean=0., stddev=TRUNC_STD), use_bias=False) self.value_dense = Dense(embed_dim, kernel_initializer=TruncatedNormal( mean=0., stddev=TRUNC_STD), use_bias=False) self.combine_heads = Dense(embed_dim, kernel_initializer=TruncatedNormal( mean=0., stddev=TRUNC_STD), bias_initializer=Zeros())
def get_embedding(region_num, region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, init_std, seed, l2_reg_linear): region_embeddings = [[ Embedding(feat.dimension, 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed + j), embeddings_regularizer=l2(l2_reg_linear), name='region_emb_' + str(j) + '_' + str(i)) for i, feat in enumerate(region_feature_dim_dict['sparse']) ] for j in range(region_num)] base_embeddings = [[ Embedding(feat.dimension, 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed + j), embeddings_regularizer=l2(l2_reg_linear), name='base_emb_' + str(j) + '_' + str(i)) for i, feat in enumerate(base_feature_dim_dict['sparse']) ] for j in range(region_num)] bias_embedding = [ Embedding(feat.dimension, 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_linear), name='embed_bias' + '_' + str(i)) for i, feat in enumerate(bias_feature_dim_dict['sparse']) ] return region_embeddings, base_embeddings, bias_embedding
def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1, prenorm=False, approximate_gelu=False): super(TransformerBlock, self).__init__() self.att = MultiHeadSelfAttention(embed_dim, num_heads) self.ffn = tf.keras.Sequential([ Dense(ff_dim, kernel_initializer=TruncatedNormal(mean=0., stddev=TRUNC_STD), bias_initializer=Zeros()), tfa.layers.GELU(approximate=approximate_gelu), Dense(embed_dim, kernel_initializer=TruncatedNormal(mean=0., stddev=TRUNC_STD), bias_initializer=Zeros()), ]) self.layernorm1 = LayerNormalization(epsilon=1e-6) self.layernorm2 = LayerNormalization(epsilon=1e-6) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.prenorm = prenorm
def __init__(self, id, depth, path_prob, tree): self.id = id self.depth = depth self.path_prob = path_prob self.is_leaf = self.depth == tree.max_depth self.left_child = None self.right_child = None self.prob = None self.dense_scaled = None self.ema = None self.ema_apply_op = None self.ema_P = None self.ema_p = None self.alpha = None self.penalty = None self.leaf_loss = None if self.is_leaf: self.phi = TrainableVar(name="phi_" + self.id, shape=(1, tree.n_classes), dtype="float32", initializer=TruncatedNormal())(path_prob) else: self.dense = Dense(units=1, name="dense_" + self.id, dtype="float32", kernel_initializer=RandomNormal(), bias_initializer=TruncatedNormal())( tree.input_layer)
def __init__(self, ksize, filters, nblocks_signal, nblocks_weights, batch_norms=True): super(DeconvHead2, self).__init__() self.nblocks_signal = nblocks_signal self.nblocks_weights = nblocks_weights self.ksize = ksize self.filters = filters self.blocks_signal = [] self.blocks_weights = [] for i in range(nblocks_signal): self.blocks_signal.append( ResBlock(ksize, filters, batch_norms=batch_norms)) for i in range(nblocks_weights): self.blocks_weights.append( ResBlock(ksize, filters, batch_norms=batch_norms)) self.conv_final_signal = tf.keras.layers.Conv1D( 1, 1, activation='linear', padding='same', kernel_initializer=TruncatedNormal(stddev=0.02), bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.l2(0.0001), bias_regularizer=tf.keras.regularizers.l2(0.00001), activity_regularizer=tf.keras.regularizers.l2(0.01)) self.dense_final_weights = tf.keras.layers.Dense( units=1, bias_initializer='zeros', kernel_initializer=TruncatedNormal(stddev=0.02), kernel_regularizer=tf.keras.regularizers.l2(0.0001), activity_regularizer=tf.keras.regularizers.l2(0.01), activation='linear')
def __encoder(self, input, num, name='Encoder', training=None): """ Treat batch_size dimension and num dimension as one batch_size dimension (batch_size * num). :param input: <batch_size, num, time_step, input_dim> :param num: the number of input time series data. For short term data, the num is 1. :return: the embedded of the input <batch_size, num, last_rnn_hid_size> """ # input = Input(shape=(num, self.time_step, self.feature_num)) batch_size_new = self.batch_size * num Tc = self.time_step - self.cnn_height + 1 # CNN # reshaped input: (batch_size_new, time_step, feature_num, 1) reshaped_input = Lambda(lambda x: K.reshape( x, (-1, self.time_step, self.feature_num, 1), ), name=name + 'reshape_cnn')(input) # output: <batch_size_new, conv_out, 1, en_conv_hidden_size> cnn_out = Conv2D(filters=self.cnn_hid_size, kernel_size=(self.cnn_height, self.feature_num), padding="valid", kernel_initializer=TruncatedNormal(stddev=0.1), bias_initializer=Constant(0.1), activation="relu")(reshaped_input) cnn_out = Dropout(self.cnn_dropout)(cnn_out, training=training) rnn_input = Lambda( lambda x: K.reshape(x, (-1, num, Tc, self.cnn_hid_size)), )(cnn_out) # use AttentionRNNWrapper rnn_cells = [ GRUCell(h_size, activation="relu", dropout=self.rnn_dropout) for h_size in self.rnn_hid_sizes ] attention_rnn = AttentionRNNWrapper( RNN(rnn_cells), weight_initializer=TruncatedNormal(stddev=0.1)) outputs = [] for i in range(num): input_i = rnn_input[:, i] # input_i = (batch, conv_hid_size, Tc) input_i = Permute((2, 1), input_shape=[Tc, self.cnn_hid_size])(input_i) # output = (batch, last_rnn_hid_size) output_i = attention_rnn(input_i, training=training) # output = (batch, 1, last_rnn_hid_size) output_i = Reshape((1, -1))(output_i) outputs.append(output_i) if len(outputs) > 1: output = Lambda(lambda x: concatenate(x, axis=1))(outputs) else: output = outputs[0] return output
def create_model(n_input, n_output): #returns nn model model = Sequential() model.add( LSTM(128, input_shape = (n_input, 1), return_sequences = False, kernel_initializer=TruncatedNormal(stddev=1. / np.sqrt(n_output))) ) model.add(Dropout(0.5)) model.add(Dense(32, activation = 'sigmoid', kernel_initializer = TruncatedNormal(stddev=1. / np.sqrt(n_output)))) model.add(Dense(n_output, activation = 'softmax', kernel_initializer = TruncatedNormal(stddev=1. / np.sqrt(n_output)))) model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy']) return model
def baseline_model(): # create model model = Sequential() model.add( Dense( 10, input_dim=nf, #kernel_regularizer=l2(0.01), kernel_initializer=RandomUniform(minval=-np.sqrt(6 / nf), maxval=np.sqrt(6 / nf), seed=42))) model.add(Activation('relu')) model.add( Dense( 7, #kernel_regularizer=l2(0.01), kernel_initializer=RandomUniform(minval=-np.sqrt(6 / 10), maxval=np.sqrt(6 / 10), seed=42))) model.add(Activation('relu')) model.add( Dense( nc, #kernel_regularizer=l2(0.01), kernel_initializer=TruncatedNormal(mean=0.0, stddev=np.sqrt(2 / (nc + 7)), seed=42))) model.add(Activation('softmax')) # Compile model model.compile(loss='categorical_crossentropy', optimizer=Adam(epsilon=1e-8), metrics=['accuracy']) return model
def build_model(self, intents_count): ################################### # ------- Build the model ------- # # TF Keras documentation: https://www.tensorflow.org/api_docs/python/tf/keras/Model # Load the MainLayer bert = self.transformer_model.layers[0] # Build your model input input_ids = Input(shape=(self.input_sentence_length,), name='input_ids', dtype='int32') token_ids = Input(shape=(self.input_sentence_length,), name='token_type_ids', dtype='int32') attention_masks = Input(shape=(self.input_sentence_length,), name='attention_mask', dtype='int32') inputs = {'input_ids': input_ids, 'token_type_ids': token_ids, 'attention_mask': attention_masks} # Load the Transformers BERT model as a layer in a Keras model bert_model = bert(inputs)[1] dropout = Dropout(self.bert_config.hidden_dropout_prob, name='pooled_output') pooled_output = dropout(bert_model) # Output layer output_layer = Dense(units=intents_count, kernel_initializer=TruncatedNormal(stddev=self.bert_config.initializer_range), name='question_type', activation='sigmoid')(pooled_output) outputs = {'type': output_layer} # And combine it all in a model object model = Model(inputs=inputs, outputs=outputs, name='QuestionType_BERT_MultiLabel') # Take a look at the model model.summary() return model
def build_top_nn(input_shape, summary=False): """" Return the custom fully connected classifier """ w = TruncatedNormal(mean=0.0, stddev=0.0001, seed=None) opt = Adamax(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0) model_top = Sequential() model_top.add(Flatten(input_shape=input_shape)) model_top.add(Dense(16, kernel_initializer=w, bias_initializer='zeros')) model_top.add(Activation('relu')) model_top.add(Dropout(0.5)) model_top.add(Dense(1, kernel_initializer=w, bias_initializer='zeros')) model_top.add(Activation('sigmoid')) if summary: print("Top classifier:") model_top.summary() model_top.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) return model_top
def train(train_generator): # Kerasモデル準備します。 global model model = VGG16(weights='imagenet', include_top=False, input_tensor=None, input_shape=(224,224,3)) # 全結合層 FC layer を再構築します。 x = model.output x = GlobalAveragePooling2D(data_format="channels_last")(x) x = Dense(1024, activation='relu', kernel_initializer=TruncatedNormal(seed=0))(x) prediction = Dense(classes_num, activation='softmax')(x) model = Model(inputs=model.input, outputs=prediction) # VGG16 14layers までを再学習させないよう、固定します。 for layer in model.layers[:15]: layer.trainable = False model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=['accuracy']) model.summary() if test_flag: hist = model.fit(x=x_data, y=y_data, epochs=epochs, verbose=1) else: hist = model.fit(train_generator, epochs=epochs, verbose=1)
def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert", pos_weight=1., bilstm_dim_list=[], transformer_kwargs={}): with self.get_scope(): encoder_model = get_sequence_encoder_model( vocab_size=self.vocab_size, pretrained_model_path=pretrained_model_path, pretrained_model_tag=pretrained_model_tag, bilstm_dim_list=bilstm_dim_list, transformer_kwargs=transformer_kwargs) sequence_embedding = encoder_model.output class_embedding = Lambda(function=lambda x: x[:, 0], name="cls_layer")(sequence_embedding) classify_activation = sigmoid if self.multi_label else softmax classifier_layer = Dense( self.label_num, name="classifier", activation=classify_activation, kernel_initializer=TruncatedNormal(stddev=0.02)) output = classifier_layer(class_embedding) if self.multi_label: output = Lambda(lambda x: x**pos_weight, name="pos_weight_layer")(output) self.nn_model = Model(inputs=encoder_model.inputs, outputs=[output], name="nn_model") logger.info("nn model's summary:") self.nn_model.summary(print_fn=logger.info) self._update_model_dict("test", self.nn_model) return self.nn_model
def build_classifier(classes, bert_h5=None): if bert_h5 is None: bert_h5 = '../tfhub/chinese_roberta_wwm_ext.h5' if language == 'cn' else '../tfhub/bert_uncased.h5' bert = load_model(bert_h5) output = Lambda(lambda x: x[:, 0], name='CLS-token')(bert.output) if classes == 2: output = Dense(1, activation='sigmoid', kernel_initializer=TruncatedNormal(stddev=0.02))(output) else: output = Dense(classes, activation='softmax', kernel_initializer=TruncatedNormal(stddev=0.02))(output) model = Model(bert.input, output) model.bert_encoder = bert return model
def __init__(self, kind: str, n_units, n_layers=1, # Its not obvious how to compute fan_in/fan_out for these models # so we recommend avoiding glorot initialization for now w_init=TruncatedNormal(stddev=0.05), recurrent_init=None, bidirectional=True, learn_initial_states: bool=False, lstm_bias=1, keep_recurrent: float=1): if bidirectional is None or n_layers is None or n_units is None: raise ValueError() if kind not in ["GRU", "LSTM"]: raise ValueError() self._kind = kind self.keep_recurrent = keep_recurrent self.lstm_bias = lstm_bias self.n_units = n_units self.n_layers = n_layers self.bidirectional = bidirectional self.w_init = w_init self.recurrent_init = recurrent_init self.learn_initial_states = learn_initial_states
def build_model(num_features: int, num_targets: int) -> Sequential: init_w = TruncatedNormal(mean=0.0, stddev=0.01) init_b = Constant(value=0.0) model = Sequential() model.add( Dense(units=500, kernel_initializer=init_w, bias_initializer=init_b, input_shape=(num_features, ))) model.add(Activation("relu")) model.add( Dense(units=250, kernel_initializer=init_w, bias_initializer=init_b)) model.add(Activation("relu")) model.add( Dense(units=100, kernel_initializer=init_w, bias_initializer=init_b)) model.add(Activation("relu")) model.add( Dense(units=num_targets, kernel_initializer=init_w, bias_initializer=init_b)) model.add(Activation("softmax")) model.summary() return model
def build(self, hiddens=[16], activations=['relu'], dropout=0.5, weight_decay=5e-4, use_bias=False, lr=0.01, p1=1.4, p2=0.7): if self.backend == "torch": raise RuntimeError( f"Currently {self.name} only supports for tensorflow backend.") with tf.device(self.device): x = Input(batch_shape=[None, self.graph.num_node_attrs], dtype=self.floatx, name='node_attr') adj = Input(batch_shape=[None, None], dtype=self.floatx, sparse=True, name='adj_matrix') index = Input(batch_shape=[None], dtype=self.intx, name='node_index') GCN_layers = [] for hidden, activation in zip(hiddens, activations): GCN_layers.append( GraphConvolution( hidden, activation=activation, use_bias=use_bias, kernel_regularizer=regularizers.l2(weight_decay))) GCN_layers.append( GraphConvolution(self.graph.num_node_classes, use_bias=use_bias)) self.GCN_layers = GCN_layers self.dropout = Dropout(rate=dropout) logit = self.forward(x, adj) output = Gather()([logit, index]) model = TFKeras(inputs=[x, adj, index], outputs=output) model.compile(loss=SparseCategoricalCrossentropy(from_logits=True), optimizer=Adam(lr=lr), metrics=['accuracy']) self.r_vadv = tf.Variable(TruncatedNormal(stddev=0.01)( shape=[self.graph.num_nodes, self.graph.num_node_attrs]), name="r_vadv") entropy_loss = entropy_y_x(logit) vat_loss = self.virtual_adversarial_loss(x, adj, logit) model.add_loss(p1 * vat_loss + p2 * entropy_loss) self.model = model self.adv_optimizer = Adam(lr=lr / 10)
def fit(self, X, y): self.X = tf.convert_to_tensor(X, tf.float32) assert(len(X.shape) == 2) assert(len(X) == len(y)) self.nparams = X.shape[1] if self.fit_intercept: ones = tf.constant(1, shape=(len(X),1), dtype=tf.float32) self.params = tf.Variable(TruncatedNormal()(shape=(self.nparams+1, 1)), dtype=tf.float32) self.X = tf.concat([self.X, ones], axis=-1) else: self.params = tf.Variable(TruncatedNormal()(shape=(self.nparams, 1)), dtype=tf.float32) self.y = y for i in range(self.iters): self.fit_single_step()
def __init__( self, num_patches, num_layers, num_classes, d_model, num_heads, mlp_dim, channels=3, dropout=0.1, prenorm=False, distill_token=False, approximate_gelu=False, ): super(KWSTransformer, self).__init__() self.d_model = d_model self.num_layers = num_layers additional_tokens = 2 if distill_token else 1 self.pos_emb = self.add_weight( "pos_emb", shape=(1, num_patches + additional_tokens, d_model), initializer=TruncatedNormal(mean=0., stddev=TRUNC_STD)) self.class_emb = self.add_weight("class_emb", shape=(1, 1, d_model), initializer=TruncatedNormal( mean=0., stddev=TRUNC_STD)) self.distill_emb = self.add_weight( "distill_emb", shape=(1, 1, d_model), initializer=TruncatedNormal( mean=0., stddev=TRUNC_STD)) if distill_token else None self.patch_proj = Dense(d_model, kernel_initializer=TruncatedNormal( mean=0., stddev=TRUNC_STD), bias_initializer=Zeros(), input_shape=( 98, 40, )) self.enc_layers = [ TransformerBlock(d_model, num_heads, mlp_dim, dropout, prenorm, approximate_gelu) for _ in range(num_layers) ]
def build_model(channels: int, num_classes: int, layer_depth: int = 5, filters_root: int = 64, kernel_size: int = 3, pool_size: int = 2, dropout_rate: int = 0.5) -> Model: """ Constructs a U-Net model :param channels: number of channels of the input tensors :param num_classes: number of classes :param layer_depth: total depth of unet :param filters_root: number of filters in top unet layer :param kernel_size: size of convolutional layers :param pool_size: size of maxplool layers :param dropout_rate: rate of dropout :return: A TF Keras model """ inputs = Input(shape=(None, None, channels)) x = inputs down_layers = {} with tf.name_scope("contracting"): for layer_idx in range(0, layer_depth - 1): with tf.name_scope(f"contracting_{layer_idx}"): x = conv_block(layer_idx, filters_root, kernel_size, dropout_rate)(x) down_layers[layer_idx] = x x = layers.MaxPooling2D((pool_size, pool_size))(x) with tf.name_scope("bottom"): x = conv_block(layer_idx + 1, filters_root, kernel_size, dropout_rate)(x) with tf.name_scope("expanding"): for layer_idx in range(layer_depth - 2, -1, -1): with tf.name_scope(f"expanding_{layer_idx}"): x = upconv_block(layer_idx, filters_root, kernel_size, pool_size)(x) x = crop_concat_block()(x, down_layers[layer_idx]) x = conv_block(layer_idx, filters_root, kernel_size, dropout_rate)(x) stddev = np.sqrt(2 / (kernel_size**2 * filters_root * 2)) x = layers.Conv2D(filters=num_classes, kernel_size=(1, 1), kernel_initializer=TruncatedNormal(stddev=stddev), strides=1, padding="valid")(x) x = layers.Activation("relu")(x) outputs = layers.Activation("softmax", name="outputs")(x) model = Model(inputs, outputs, name="unet") return model
def create_model(): model = Sequential() model.add( Convolution2D(input_shape=[40, 32, 1], filters=64, kernel_size=[20, 8], strides=[1, 1], padding='same', kernel_initializer=TruncatedNormal(), activation='relu')) model.add(Dropout(rate=0.5)) model.add(MaxPooling2D(pool_size=[1, 3], strides=[1, 3], padding='same')) model.add(BatchNormalization()) model.add( Convolution2D(filters=64, kernel_size=[10, 4], strides=[1, 1], padding='same', kernel_initializer=TruncatedNormal(stddev=0.01), activation='relu')) model.add(Flatten()) model.add( Dense(32, activation='relu', kernel_initializer=TruncatedNormal(stddev=0.01))) model.add( Dense(128, activation='relu', kernel_initializer=TruncatedNormal(stddev=0.01))) model.add( Dense(1, activation='sigmoid', kernel_initializer=TruncatedNormal(stddev=0.01))) print(model.summary()) return model
def __init__(self, n_units, n_layers=1, lstm_bias=1, w_init=TruncatedNormal(stddev=0.05), recurrent_init=None, bidirectional=True, learn_initial_states=False): super().__init__("LSTM", n_units, n_layers, w_init, recurrent_init, bidirectional, learn_initial_states, lstm_bias)
def build(self, hiddens=[16], activations=['relu'], dropout=0.5, l2_norm=5e-4, use_bias=False, lr=0.01, p1=1.4, p2=0.7): with tf.device(self.device): x = Input(batch_shape=[None, self.graph.n_attrs], dtype=self.floatx, name='attr_matrix') adj = Input(batch_shape=[None, None], dtype=self.floatx, sparse=True, name='adj_matrix') index = Input(batch_shape=[None], dtype=self.intx, name='node_index') GCN_layers = [] dropout_layers = [] for hidden, activation in zip(hiddens, activations): GCN_layers.append( GraphConvolution( hidden, activation=activation, use_bias=use_bias, kernel_regularizer=regularizers.l2(l2_norm))) dropout_layers.append(Dropout(rate=dropout)) GCN_layers.append( GraphConvolution(self.graph.n_classes, use_bias=use_bias)) self.GCN_layers = GCN_layers self.dropout_layers = dropout_layers logit = self.forward(x, adj) output = Gather()([logit, index]) model = Model(inputs=[x, adj, index], outputs=output) model.compile(loss=SparseCategoricalCrossentropy(from_logits=True), optimizer=Adam(lr=lr), metrics=['accuracy']) self.r_vadv = tf.Variable(TruncatedNormal(stddev=0.01)( shape=[self.graph.n_nodes, self.graph.n_attrs]), name="r_vadv") entropy_loss = entropy_y_x(logit) vat_loss = self.virtual_adversarial_loss(x, adj, logit) model.add_loss(p1 * vat_loss + p2 * entropy_loss) self.model = model self.adv_optimizer = Adam(lr=lr / 10)
def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert", bilstm_dim_list=[], use_crf=False, crf_lr_multiplier=100, pos_weight=1, **kwargs): """ 构建模型 Args: pretrained_model_path: 预训练模型地址 pretrained_model_tag: 预训练模型类型bert/... dense_dim_list: 序列encode之后过的每个全连接层的维度(默认用relu做激活函数)。如果为空列表,表示不添加全连接层 hidden_dropout_prob: 序列encode之后过得dropout层drop概率。避免过拟合 bilstm_dim_list: 序列encode过程中如果要接bilstm。输入每个bilstm层的dimension use_crf: 是否使用crf层 crf_lr_multiplier: crf层的学习率倍数,参考https://kexue.fm/archives/7196 pos_weight: 正例的权重 **kwargs: Returns: nn模型 """ with self.get_scope(): encoder_model = get_sequence_encoder_model( vocab_size=self.vocab_size, pretrained_model_path=pretrained_model_path, pretrained_model_tag=pretrained_model_tag, bilstm_dim_list=bilstm_dim_list, **kwargs) sequence_embedding = encoder_model.output classify_activation = sigmoid if self.multi_label else softmax classifier_layer = Dense( self.label_num, name="token_classifier", activation=classify_activation, kernel_initializer=TruncatedNormal(stddev=0.02)) prob_vec_output = classifier_layer(sequence_embedding) if use_crf: classifier_layer = CRF(lr_multiplier=crf_lr_multiplier, name="crf_layer") prob_vec_output = classifier_layer(prob_vec_output) if self.multi_label: prob_vec_output = Lambda( lambda x: x**pos_weight, name="pos_weight_layer")(prob_vec_output) self.nn_model = Model(inputs=encoder_model.inputs, outputs=[prob_vec_output], name="token_classify_model") logger.info("nn model's summary:") self.nn_model.summary(print_fn=logger.info) self._update_model_dict("test", self.nn_model) return self.nn_model
def get_initializer(init_name='truncate_norm', init_stddev=0.05, seed=1024): if init_name in ('truncate_norm', 'truncate_normal'): return TruncatedNormal(stddev=init_stddev, seed=seed) elif init_name in ('glorot_norm', 'glorot_normal', 'xavier_norm', 'xavier_normal'): return glorot_normal(seed=seed) elif init_name in ('he_norm', 'he_normal'): return he_normal(seed) elif init_name in ('trucate_uniform'): return TruncatedNormal(stddev=init_stddev) elif init_name in ('glorot_uniform'): return glorot_uniform() elif init_name in ('he_uniform'): return he_uniform() elif init_name in ('zero', 'zeros'): return Zeros() elif init_name in ('ones', 'one'): return Ones() else: raise ValueError('not support {} initializer'.format(init_name))
def __init__(self, id, depth, pathprob, tree): self.id = id self.depth = depth self.pathprob = pathprob self.isLeaf = self.depth == tree.max_depth self.leftChild = None self.rightChild = None if self.isLeaf: self.phi = TrainableVar(name='phi_' + self.id, shape=(1, tree.n_classes), dtype='float32', initializer=TruncatedNormal())(pathprob) else: self.dense = Dense(units=1, name='dense_' + self.id, dtype='float32', kernel_initializer=RandomNormal(), bias_initializer=TruncatedNormal())( tree.input_layer)
def make_discriminator(input_shape=(416, 416, 3), lr=0.0002, beta_1=0.5): model = keras.Sequential() init = TruncatedNormal(mean=0.0, stddev=0.02) model.add( Conv2D(filters=48, kernel_size=(4, 4), kernel_initializer=init, strides=(1, 1), padding='same', input_shape=input_shape)) model.add(LeakyReLU(alpha=0.2)) # Downsample model.add( Conv2D(filters=128, kernel_size=(4, 4), kernel_initializer=init, strides=(2, 2), padding='same')) # model.add(BatchNormalization()) model.add(LeakyReLU(alpha=0.2)) # downsample model.add( Conv2D(filters=256, kernel_size=(4, 4), kernel_initializer=init, strides=(2, 2), padding='same')) # model.add(BatchNormalization()) model.add(LeakyReLU(alpha=0.2)) # model.add(Dropout(rate=0.4)) model.add( Conv2D(filters=256, kernel_size=(4, 4), kernel_initializer=init, strides=(2, 2), padding='same')) # model.add(BatchNormalization()) model.add(LeakyReLU(alpha=0.2)) # classifier model.add(Flatten()) model.add(Dropout(rate=0.4)) model.add(Dense(1, activation='sigmoid')) # compile model optimizer = keras.optimizers.Adam(lr=lr, beta_1=beta_1) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model
def block(x): filters = 2**(layer_idx + 1) * filters_root stddev = np.sqrt(2 / (kernel_size**2 * filters)) x = layers.Conv2DTranspose( filters // 2, kernel_size=(pool_size, pool_size), kernel_initializer=TruncatedNormal(stddev=stddev), strides=pool_size, padding="valid")(x) x = layers.Activation("relu")(x) return x