def build_graph(config): input_x = tf.placeholder(tf.int32, [None, None], name='input_x') input_y = tf.placeholder(tf.int64, [None], name='input_y') with tf.device('/cpu:0'): emb_mat = tf.get_variable('embedding', [config.vocab.size(), config.vocab.emb_dim], initializer=tf.constant_initializer(config.vocab.embeddings), trainable = config.emb_tune) seq_emb = tf.nn.embedding_lookup(emb_mat, input_x) seq_mask = tf.cast(tf.cast(input_x, dtype = tf.bool), dtype = tf.int32) seq_len = tf.reduce_sum(seq_mask, 1) with tf.name_scope("rnn"): seq_e = rnn_layer(seq_emb, seq_len, 128, config.keep_prob, activation = tf.nn.relu, concat = True, scope = 'bi-lstm-1') B = tf.shape(seq_e)[0] query = tf.get_variable("query", [config.att_dim], initializer = tf.ones_initializer()) query = tf.tile(tf.expand_dims(query, 0), [B, 1]) feat = att_pool_layer(seq_e, query, seq_mask, config.att_dim, config.keep_prob, is_train=None, scope="att_pooling") #feat = seq_e[:,-1,:] with tf.name_scope("score"): # fc = tf.contrib.layers.dropout(feat, config.keep_prob) fc = tf.layers.dense(fc, 128, name='fc1') fc = tf.nn.relu(fc) fc = tf.contrib.layers.dropout(fc, config.keep_prob) logits = tf.layers.dense(fc, config.num_classes, name='fc2') # logits = tf.nn.sigmoid(fc) normed_logits = tf.nn.softmax(logits, name='logits') y_pred_cls = tf.argmax(logits, 1, name='pred_cls') with tf.name_scope("loss"): # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = input_y) loss = tf.reduce_mean(cross_entropy, name = 'loss') with tf.name_scope("accuracy"): # correct_pred = tf.equal(input_y, y_pred_cls) acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name = 'metric') # print(normed_logits) print(acc) print(loss) print()
def build_inference(self, input_tensors): """ """ settings = self.settings input_x = input_tensors["input_x"] # keep_prob = tf.get_variable("keep_prob", shape=[], dtype=tf.float32, trainable=False) # with tf.device('/cpu:0'): emb_mat = tf.get_variable( 'embedding', [settings.vocab.size(), settings.vocab.emb_dim], initializer=tf.constant_initializer(settings.vocab.embeddings), trainable=settings.emb_tune, dtype=tf.float32) seq_emb = tf.nn.embedding_lookup(emb_mat, input_x) seq_mask = tf.cast(tf.cast(input_x, dtype=tf.bool), dtype=tf.int32) seq_len = tf.reduce_sum(seq_mask, 1) with tf.name_scope("rnn"): seq_e = rnn_layer(seq_emb, seq_len, 128, keep_prob, activation=tf.nn.relu, concat=True, scope='bi-lstm-1') # attention-pooling, 注意力加权采提 # B = tf.shape(seq_e)[0] query = tf.get_variable("query", [settings.att_dim], initializer=tf.ones_initializer()) query = tf.tile(tf.expand_dims(query, 0), [B, 1]) feat = att_pool_layer(query, seq_e, seq_mask, settings.att_dim, keep_prob, scope="att_pooling") #feat = seq_e[:,-1,:] with tf.name_scope("score"): # fc = tf.nn.dropout(feat, keep_prob) fc = tf.layers.dense(fc, 128, name='fc1') fc = tf.nn.relu(fc) fc = tf.nn.dropout(fc, keep_prob) logits = tf.layers.dense(fc, settings.num_classes, name='fc2') normed_logits = tf.nn.softmax(logits, name='logits') # print(normed_logits) # output_tensors = {"normed_logits": normed_logits, "logits": logits} # return output_tensors
def build_graph(config): input_x = tf.placeholder(tf.int32, [None, None], name='input_x') input_y = tf.placeholder(tf.int64, [None], name='input_y') with tf.device('/cpu:0'): emb_mat = tf.get_variable( 'embedding', [config.vocab.size(), config.vocab.emb_dim], initializer=tf.constant_initializer(config.vocab.embeddings), trainable=config.emb_tune) seq_emb = tf.nn.embedding_lookup(emb_mat, input_x) seq_mask = tf.cast(tf.cast(input_x, dtype=tf.bool), dtype=tf.int32) # seq_len = tf.reduce_sum(seq_mask, 1) with tf.name_scope("csm"): conv1_5 = tf.layers.conv1d(seq_emb, 128, 5, padding='same', name='conv1_5') conv1_3 = tf.layers.conv1d(seq_emb, 128, 3, padding='same', name='conv1_3') conv1_2 = tf.layers.conv1d(seq_emb, 128, 2, padding='same', name='conv1_2') emb_d = tf.concat([conv1_5, conv1_3, conv1_2, seq_emb], -1) emb_d = tf.layers.dense(emb_d, 256, name='emb_d') B = tf.shape(emb_d)[0] num_heads = 2 att_dim = 128 feat = [] for idx in range(num_heads): trans = dot_att_layer(emb_d, emb_d, seq_mask, 256, keep_prob=config.keep_prob, gating=False, scope="dot_attention_" + str(idx)) query = tf.get_variable("query_" + str(idx), [att_dim], initializer=tf.ones_initializer()) query = tf.tile(tf.expand_dims(query, 0), [B, 1]) feat_c = att_pool_layer(trans, query, seq_mask, att_dim, config.keep_prob, is_train=None, scope="att_pooling_" + str(idx)) feat.append(feat_c) # feat = tf.concat(feat, 1) # with tf.name_scope("score"): # fc = tf.contrib.layers.dropout(feat, config.keep_prob) fc = tf.layers.dense(fc, 128, name='fc1') fc = tf.nn.relu(fc) fc = tf.contrib.layers.dropout(fc, config.keep_prob) logits = tf.layers.dense(fc, config.num_classes, name='fc2') #logits = tf.nn.sigmoid(logits) normed_logits = tf.nn.softmax(logits, name='logits') y_pred_cls = tf.argmax(logits, 1, name='pred_cls') with tf.name_scope("loss"): # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=input_y) loss = tf.reduce_mean(cross_entropy, name='loss') with tf.name_scope("accuracy"): # correct_pred = tf.equal(input_y, y_pred_cls) acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='metric') # print(normed_logits) print(acc) print(loss) print()
def build_inference(settings, input_tensors): input_x = input_tensors[0] # keep_prob = tf.get_variable("keep_prob", shape=[], dtype=tf.float32, trainable=False) # with tf.device('/cpu:0'): emb_mat = tf.get_variable( 'embedding', [settings.vocab.size(), settings.vocab.emb_dim], initializer=tf.constant_initializer(settings.vocab.embeddings), trainable=settings.emb_tune) with tf.variable_scope("emb"): emb_dim = settings.vocab.emb_dim emb_x = tf.nn.embedding_lookup(emb_mat, input_x) mask_t = tf.cast(tf.cast(input_x, dtype=tf.bool), dtype=tf.int32) # seq_len = tf.reduce_sum(mask_t, 1) with tf.variable_scope("posi_emb"): d_posi_emb = 64 d_model = 1024 posi_emb_x = get_posi_emb(input_x, d_posi_emb, d_model) emb_x = tf.concat([emb_x, posi_emb_x], -1) emb_all_dim = emb_dim + d_posi_emb * 2 # enc_t = emb_x enc_dim = emb_all_dim # # transformers # num_layers_trans = 2 # for lid in range(num_layers_trans): with tf.variable_scope("self_att_%d" % lid): num_head = 2 num_hidden = int(128 / num_head) sat_t = [] for idx in range(num_head): sat_t_c = att_qkv_layer(enc_t, enc_t, enc_t, mask_t, num_hidden, keep_prob=keep_prob, scope="t_%d" % idx) sat_t.append(sat_t_c) # sat_t = tf.concat(sat_t, -1) # # add & norm sat_t = dropout(sat_t, keep_prob=keep_prob) sat_t = tf.layers.dense(sat_t, enc_dim) # enc_t = enc_t + sat_t enc_t = tf.contrib.layers.layer_norm(enc_t) # """ # dense ffn_t = dropout(enc_t, keep_prob=keep_prob) ffn_t = tf.layers.dense(ffn_t, enc_dim, activation=tf.nn.relu) ffn_t = tf.layers.dense(ffn_t, enc_dim) # # add & norm enc_t = enc_t + ffn_t enc_t = tf.contrib.layers.layer_norm(enc_t) # """ with tf.variable_scope("feat"): """ attention-pooling, 注意力加权采提 """ B = tf.shape(enc_t)[0] query = tf.get_variable("query", [settings.att_dim], initializer=tf.ones_initializer()) query = tf.tile(tf.expand_dims(query, 0), [B, 1]) feat = att_pool_layer(query, enc_t, mask_t, settings.att_dim, keep_prob, scope="att_pooling") with tf.variable_scope("score"): # fc = tf.nn.dropout(feat, keep_prob) fc = tf.layers.dense(fc, 128, name='fc1') fc = tf.nn.relu(fc) fc = tf.nn.dropout(fc, keep_prob) logits = tf.layers.dense(fc, settings.num_classes, name='fc2') normed_logits = tf.nn.softmax(logits, name='logits') # print(normed_logits) # output_tensors = normed_logits, logits # return output_tensors
def build_inference(self, input_tensors): """ """ settings = self.settings input_x = input_tensors["input_x"] # keep_prob = tf.get_variable("keep_prob", shape=[], dtype=tf.float32, trainable=False) # with tf.device('/cpu:0'): emb_mat = tf.get_variable( 'embedding', [settings.vocab.size(), settings.vocab.emb_dim], initializer=tf.constant_initializer(settings.vocab.embeddings), trainable=settings.emb_tune, dtype=tf.float32) emb_dim = settings.vocab.emb_dim with tf.variable_scope("mask"): mask_t = tf.cast(tf.cast(input_x, dtype=tf.bool), dtype=tf.int32) # seq_len = tf.reduce_sum(mask_t, 1) mask_mat = get_tensor_expanded(mask_t, 1, tf.float32) with tf.variable_scope("emb"): posi_emb_max_len = 512 posi_emb_dim = emb_dim posi_emb_model = 1024 posi_emb_mat = get_position_emb_mat(posi_emb_max_len, posi_emb_dim, posi_emb_model) # emb_x = get_emb_positioned(input_x, emb_mat, posi_emb_mat) emb_all_dim = emb_dim # seq_input = emb_x dim_all = emb_all_dim # # # transformers # num_layers = 2 num_heads = 2 # dim_middle = emb_dim * 2 activation_type = "gelu" # with tf.variable_scope("transformers"): seq_input = transformer_encoder(seq_input, mask_mat, num_layers, num_heads, dim_all, dim_middle, activation_type, keep_prob) # with tf.variable_scope("feat"): """ attention-pooling, 注意力加权采提 """ B = tf.shape(seq_input)[0] query = tf.get_variable("query", [settings.att_dim], initializer=tf.ones_initializer()) query = tf.tile(tf.expand_dims(query, 0), [B, 1]) feat = att_pool_layer(query, seq_input, mask_t, settings.att_dim, keep_prob, scope="att_pooling") with tf.variable_scope("score"): # fc = tf.nn.dropout(feat, keep_prob) fc = tf.layers.dense(fc, 128, name='fc1') fc = tf.nn.relu(fc) fc = tf.nn.dropout(fc, keep_prob) logits = tf.layers.dense(fc, settings.num_classes, name='fc2') normed_logits = tf.nn.softmax(logits, name='logits') # print(normed_logits) # output_tensors = {"normed_logits": normed_logits, "logits": logits} # return output_tensors