def cnn(self, name_scope, char_embedded): char_embedded = tf.expand_dims(char_embedded, -1) pooled_outputs = list() for i, filter_size in enumerate(self.config.filter_sizes): with tf.variable_scope(f"{name_scope}_conv1_{filter_size}"): filter_shape = [filter_size, self.config.char_embedding_dim, 1, self.config.n_filter] w_filter = weight_variable(shape=filter_shape, name='w_filter') beta = bias_variable(shape=[self.config.n_filter], name='beta_filter') conv = tf.nn.bias_add( tf.nn.conv2d(char_embedded, w_filter, strides=[1, 1, 1, 1], padding="VALID", name="conv"), beta) h = tf.nn.relu(conv, name="relu") with tf.variable_scope(f"{name_scope}_conv2_{filter_size}"): filter_shape = [filter_size, 1, self.config.n_filter, self.config.n_filter] w_filter = weight_variable(shape=filter_shape, name='w_filter') beta = bias_variable(shape=[self.config.n_filter], name='beta_filter') conv = tf.nn.bias_add( tf.nn.conv2d(h, w_filter, strides=[1, 1, 1, 1], padding="VALID", name="conv"), beta) h = tf.nn.relu(conv, name="relu") pooled = tf.nn.max_pool(h, ksize=[1, self.config.char_max_len - filter_size * 2 + 2, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) h_pool = tf.concat(pooled_outputs, 3) cnn_char_enc = tf.reshape(h_pool, [self.config.batch_size, -1, self.config.n_filter * len(self.config.filter_sizes)]) return cnn_char_enc
def __init__(self): self.model_name = 'bigru' self.settings = BiGRUSetting() self.max_f1 = 0.0 self.is_training = True with tf.name_scope('Inputs'): self.title_input = tf.placeholder(tf.int64, [None, self.settings.title_len], name='title_inputs') self.detail_input = tf.placeholder( tf.int64, [None, self.settings.detail_len], name='detail_inputs') self.class_input = tf.placeholder(tf.float32, [None, self.settings.class_num], name='class_input') self.title_length = tf.placeholder(tf.int64, [None], name='title_length') self.detail_length = tf.placeholder(tf.int64, [None], name='detail_length') self.keep_prob = tf.placeholder(tf.float32, []) """ 构建embedding层 """ with tf.variable_scope('embedding'): self.embedding = tf.get_variable( name='embedding', shape=[self.settings.voc_size, self.settings.embedding_dim], initializer=tf.contrib.layers.xavier_initializer()) """ 构建stack_bi_gru+Attention层 """ with tf.variable_scope('bi_gru_title'): title_embedded = tf.nn.embedding_lookup(self.embedding, self.title_input) title_bi_gru_output = self.stack_bi_gru_layer( title_embedded, self.title_length) title_attention_output = attention_layer( title_bi_gru_output, self.settings.bi_gru_hidden_dim * 2) with tf.variable_scope('bi_gru_detail'): detail_embedded = tf.nn.embedding_lookup(self.embedding, self.detail_input) detail_bi_gru_output = self.stack_bi_gru_layer( detail_embedded, self.detail_length) detail_attention_output = attention_layer( detail_bi_gru_output, self.settings.bi_gru_hidden_dim * 2) """ 构建fully connected层 """ with tf.variable_scope('fc'): concat_output = tf.concat( [title_attention_output, detail_attention_output], axis=1) W_fc = weight_variable([ self.settings.bi_gru_hidden_dim * 4, self.settings.fc_hidden_dim ], name='Weight_fc') fc_output = tf.matmul(concat_output, W_fc, name='h_fc') fc_bn_relu = tf.nn.relu(fc_output, name="relu") """ 构建输出层 """ with tf.variable_scope('output'): W_out = weight_variable( [self.settings.fc_hidden_dim, self.settings.class_num], name='Weight_out') b_out = bias_variable([self.settings.class_num], name='bias_out') self.y_pred = tf.nn.xw_plus_b(fc_bn_relu, W_out, b_out, name='y_pred') self.sigmoid_y_pred = tf.nn.sigmoid(self.y_pred) """ loss """ with tf.variable_scope('loss'): self.loss = add_loss(self.y_pred, self.class_input) """ train """ with tf.variable_scope('training_ops'): self.train_op = add_train_op(lr=self.settings.lr, loss=self.loss) self.saver = tf.train.Saver(max_to_keep=1, name=self.model_name) print(f'{self.model_name} init finish')
def __init__(self): super().__init__('rcnn') self.settings = RCNNSetting() self.n_filter_total = self.settings.n_filter * len( self.settings.filter_sizes) with tf.name_scope('Inputs'): self.title_input = tf.placeholder(tf.int64, [None, self.settings.title_len], name='title_inputs') self.detail_input = tf.placeholder( tf.int64, [None, self.settings.detail_len], name='detail_inputs') self.class_input = tf.placeholder(tf.float32, [None, self.settings.class_num], name='class_input') self.title_length = tf.placeholder(tf.int64, [None], name='title_length') self.detail_length = tf.placeholder(tf.int64, [None], name='detail_length') self.keep_prob = tf.placeholder(tf.float32, []) """ 构建embedding层 """ with tf.variable_scope('embedding'): self.embedding = tf.get_variable( name='embedding', shape=[self.settings.voc_size, self.settings.embedding_dim], initializer=tf.contrib.layers.xavier_initializer()) """ 构建RCNN层 """ with tf.variable_scope('rcnn_text'): output_title = self.rcnn_layer(self.title_input, self.settings.title_len, self.title_length) with tf.variable_scope('rcnn_content'): output_content = self.rcnn_layer(self.detail_input, self.settings.detail_len, self.detail_length) concat_output = tf.concat([output_title, output_content], axis=1) """ 构建fully connected层 """ with tf.variable_scope('fc_bn'): W_fc = weight_variable( [self.n_filter_total * 2, self.settings.fc_hidden_dim], name='Weight_fc') fc_output = tf.matmul(concat_output, W_fc, name='h_fc') fc_bn_relu = tf.nn.relu(fc_output, name="relu") fc_bn_drop = tf.nn.dropout(fc_bn_relu, self.keep_prob) """ 构建输出层 """ with tf.variable_scope('output'): W_out = weight_variable( [self.settings.fc_hidden_dim, self.settings.class_num], name='Weight_out') b_out = bias_variable([self.settings.class_num], name='bias_out') self.y_pred = tf.nn.xw_plus_b(fc_bn_drop, W_out, b_out, name='y_pred') self.sigmoid_y_pred = tf.nn.sigmoid(self.y_pred) """ loss """ with tf.variable_scope('loss'): self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=self.y_pred, labels=self.class_input)) """ train """ with tf.variable_scope('training_ops'): self.train_op = add_train_op(lr=self.settings.lr, loss=self.loss, global_step=self.global_step) self.saver = tf.train.Saver(max_to_keep=1, name=self.model_name) print(f'{self.model_name} init finish')
def cnn_layer(self, X_inputs, n_step): """ TextCNN 模型。 Args: X_inputs: tensor.shape=(batch_size, n_step) Returns: title_outputs: tensor.shape=(batch_size, self.n_filter_total) """ inputs = tf.nn.embedding_lookup(self.embedding, X_inputs) inputs = tf.expand_dims(inputs, -1) pooled_outputs = list() for i, filter_size in enumerate(self.settings.filter_sizes): with tf.variable_scope("conv1%s" % filter_size): # Convolution Layer filter_shape = [ filter_size, self.settings.embedding_dim, 1, self.settings.n_filter ] W_filter = weight_variable(shape=filter_shape, name='W_filter') beta = bias_variable(shape=[self.settings.n_filter], name='beta_filter') # tf.summary.histogram('beta', beta) conv = tf.nn.conv2d(inputs, W_filter, strides=[1, 1, 1, 1], padding="VALID", name="conv") # conv_bn, update_ema = self.batchnorm(conv, beta, convolutional=True) # 在激活层前面加 BN # Apply nonlinearity, batch norm scaling is not useful with relus # batch norm offsets are used instead of biases,使用 BN 层的 offset,不要 biases h = tf.nn.relu(conv, name="relu") with tf.variable_scope("conv2%s" % filter_size): filter_shape = [ filter_size, 1, self.settings.n_filter, self.settings.n_filter ] W_filter = weight_variable(shape=filter_shape, name='W_filter') beta = bias_variable(shape=[self.settings.n_filter], name='beta_filter') # tf.summary.histogram('beta', beta) conv = tf.nn.conv2d(h, W_filter, strides=[1, 1, 1, 1], padding="VALID", name="conv") # conv_bn, update_ema = self.batch_norm(conv, beta, convolutional=True) # 在激活层前面加 BN # Apply nonlinearity, batch norm scaling is not useful with relus # batch norm offsets are used instead of biases,使用 BN 层的 offset,不要 biases # h = tf.nn.relu(conv_bn, name="relu") h = tf.nn.relu(conv, name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, n_step - filter_size * 2 + 2, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # self.update_emas.append(update_ema) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, self.n_filter_total]) return h_pool_flat # shape = [batch_size, self.n_filter_total]
def __init__(self): self.model_name = 'transformer' self.settings = TransformerSetting() self.max_f1 = 0.0 self.is_training = True with tf.name_scope('Inputs'): self.title_input = tf.placeholder(tf.int64, [None, self.settings.title_len], name='title_inputs') self.detail_input = tf.placeholder( tf.int64, [None, self.settings.detail_len], name='detail_inputs') self.class_input = tf.placeholder(tf.float32, [None, self.settings.class_num], name='class_input') self.keep_prob = tf.placeholder(tf.float32, []) """"===========title encoder start================""" """ 构建embedding层 """ self.title_embedded, self.lookup_table = embedding( self.title_input, vocab_size=self.settings.voc_size, num_units=self.settings.embedding_dim, scale=True, scope="title_embedding") self.title_embedded += embedding(tf.tile( tf.expand_dims(tf.range(self.settings.title_len), 0), [self.settings.batch_size, 1]), vocab_size=self.settings.title_len, num_units=self.settings.embedding_dim, zero_pad=False, scale=False, scope="title_position_embedding")[0] """ Dropout """ self.title_embedded = tf.layers.dropout(self.title_embedded, rate=self.keep_prob, training=tf.convert_to_tensor( self.is_training)) ## Blocks for i in range(self.settings.num_blocks): with tf.variable_scope("title_num_blocks_{}".format(i)): ### Multihead Attention self.title_embedded = multihead_attention( queries=self.title_embedded, keys=self.title_embedded, num_units=self.settings.hidden_dim, num_heads=self.settings.num_heads, dropout_rate=self.keep_prob, is_training=self.is_training, causality=False) ### Feed Forward self.title_embedded = feedforward( self.title_embedded, num_units=[ 4 * self.settings.hidden_dim, self.settings.hidden_dim ]) """ sum """ self.title_encoder = tf.reduce_sum(self.title_embedded, axis=1) """"===========title encoder end================""" """"===========description encoder start================""" """ 构建embedding层 """ self.description_embedded = tf.nn.embedding_lookup( self.lookup_table, self.detail_input) * (self.settings.embedding_dim**0.5) self.description_embedded += embedding( tf.tile(tf.expand_dims(tf.range(self.settings.detail_len), 0), [self.settings.batch_size, 1]), vocab_size=self.settings.detail_len, num_units=self.settings.embedding_dim, zero_pad=False, scale=False, scope="description_position_embedding")[0] """ Dropout """ self.description_embedded = tf.layers.dropout( self.description_embedded, rate=self.keep_prob, training=tf.convert_to_tensor(self.is_training)) ## Blocks for i in range(self.settings.num_blocks): with tf.variable_scope("description_num_blocks_{}".format(i)): ### Multihead Attention self.description_embedded = multihead_attention( queries=self.description_embedded, keys=self.description_embedded, num_units=self.settings.hidden_dim, num_heads=self.settings.num_heads, dropout_rate=self.keep_prob, is_training=self.is_training, causality=False) ### Feed Forward self.description_embedded = feedforward( self.description_embedded, num_units=[ 4 * self.settings.hidden_dim, self.settings.hidden_dim ]) """ sum """ self.description_encoder = tf.reduce_sum(self.description_embedded, axis=1) """"===========description encoder end================""" """ 构建fully connected层 """ with tf.variable_scope('fc'): concat_output = tf.concat( [self.title_encoder, self.description_encoder], axis=1) W_fc = weight_variable( [self.settings.hidden_dim * 2, self.settings.fc_hidden_dim], name='Weight_fc') fc_output = tf.matmul(concat_output, W_fc, name='h_fc') fc_bn_relu = tf.nn.relu(fc_output, name="relu") """ 构建输出层 """ with tf.variable_scope('output'): W_out = weight_variable( [self.settings.fc_hidden_dim, self.settings.class_num], name='Weight_out') b_out = bias_variable([self.settings.class_num], name='bias_out') self.y_pred = tf.nn.xw_plus_b(fc_bn_relu, W_out, b_out, name='y_pred') self.sigmoid_y_pred = tf.nn.sigmoid(self.y_pred) """ loss """ with tf.variable_scope('loss'): self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=self.y_pred, labels=self.class_input)) """ train """ with tf.variable_scope('training_ops'): self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.settings.lr, beta1=0.9, beta2=0.98, epsilon=1e-8) self.train_op = self.optimizer.minimize( self.loss, global_step=self.global_step) self.saver = tf.train.Saver(max_to_keep=1, name='cnn') print(f'{self.model_name} init finish')