def build_rnn(rnn_config, dropout=None): _dropout_keep_prob = dropout if rnn_config['use_dropout'] else None if rnn_config['type'] == 'gru': return rnn_cell.build_gru(rnn_config['dim'], dropout_keep_prob=_dropout_keep_prob) elif rnn_config['type'] == 'lstm': return rnn_cell.build_lstm(rnn_config['dim'], dropout_keep_prob=_dropout_keep_prob) else: raise ValueError(rnn_config)
def build_neural_network(self, lookup_table): test_mode = tf.placeholder(tf.int8, None, name=TEST_MODE) label_gold = tf.placeholder(tf.int32, [ None, ], name=LABEL_GOLD) sample_weights = tf.placeholder(tf.float32, [ None, ], name=SAMPLE_WEIGHTS) lookup_table = tf.Variable(lookup_table, dtype=tf.float32, name=LOOKUP_TABLE, trainable=self.config.embedding_trainable) dropout_keep_prob = build_dropout_keep_prob( keep_prob=self.config.dropout_keep_prob, test_mode=test_mode) tid_ = [list() for _ in range(3)] seq_len_ = [list() for _ in range(3)] embedded_ = [list() for _ in range(3)] for i in range(3): tid_[i] = tf.placeholder( tf.int32, [self.config.batch_size, self.config.seq_len], name=TID_[i]) seq_len_[i] = tf.placeholder(tf.int32, [ None, ], name=SEQ_LEN_[i]) embedded_[i] = tf.nn.embedding_lookup(lookup_table, tid_[i]) if self.config.embedding_noise_type is None: pass elif self.config.embedding_noise_type == 'gaussian': for i in range(3): embedded_[i] = add_gaussian_noise_layer( embedded_[i], stddev=self.config.embedding_noise_stddev, test_mode=test_mode) elif self.config.embedding_noise_type == 'dropout': emb_dropout_keep_prob = build_dropout_keep_prob( keep_prob=self.config.embedding_dropout_keep_prob, test_mode=test_mode) for i in range(3): embedded_[i] = tf.nn.dropout(embedded_[i], emb_dropout_keep_prob) else: raise Exception('unknown embedding noise type: {}'.format( self.config.embedding_noise_type)) last_states = list() for i in range(3): with tf.variable_scope('turn{}'.format(i)): outputs, last_state = tf.nn.dynamic_rnn( rnn_cell.build_gru(self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob), inputs=embedded_[i], sequence_length=seq_len_[i], dtype=tf.float32) if self.config.use_attention: last_state, _ = attention.build(outputs, self.config.attention_dim) last_states.append(last_state) last_states = list( map( lambda _state: tf.nn.dropout(_state, keep_prob=dropout_keep_prob), last_states)) dense_input = tf.concat(last_states, axis=1, name=HIDDEN_FEAT) l2_component = None for conf in self.config.dense_layers: dense_input, w, _ = dense.build(dense_input, dim_output=conf['dim'], activation=getattr( tf.nn, conf['activation'])) if conf.get('l2', 0.) > 0: comp = conf['l2'] * tf.nn.l2_loss(w) if l2_component is None: l2_component = comp else: l2_component += comp l2_w_list = list() if self.config.max_out is None: y, w, b = dense.build(dense_input, dim_output=self.config.output_dim, output_name=PROB_PREDICT) l2_w_list.append(w) else: y_list = list() if len(self.config.max_out) != self.config.output_dim: raise ValueError('invalid max_out config') for dim in self.config.max_out: y, w, b = dense.build(dense_input, dim_output=dim) y = tf.expand_dims(tf.reduce_max(y, 1), axis=1) y_list.append(y) l2_w_list.append(w) y = tf.concat(y_list, axis=1, name=PROB_PREDICT) # 计算loss _loss_1 = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=y, labels=label_gold, weights=sample_weights)) _loss_2 = tf.constant(0., dtype=tf.float32) if self.config.l2_reg_lambda is not None and self.config.l2_reg_lambda > 0: for w in l2_w_list: _loss_2 += self.config.l2_reg_lambda * tf.nn.l2_loss(w) if l2_component is not None: _loss_2 += l2_component loss = tf.add(_loss_1, _loss_2, name=LOSS) # 预测标签 tf.cast(tf.argmax(y, 1), tf.int32, name=LABEL_PREDICT) # 统一的后处理 self.build_optimizer(loss=loss) self.set_graph(graph=tf.get_default_graph())
def build_neural_network(self, lookup_table): test_mode = tf.placeholder(tf.int8, None, name=TEST_MODE) label_gold = tf.placeholder(tf.int32, [ None, ], name=LABEL_GOLD) sample_weights = tf.placeholder(tf.float32, [ None, ], name=SAMPLE_WEIGHTS) lookup_table = tf.Variable(lookup_table, dtype=tf.float32, name=LOOKUP_TABLE, trainable=self.config.embedding_trainable) dropout_keep_prob = build_dropout_keep_prob( keep_prob=self.config.dropout_keep_prob, test_mode=test_mode) tid_0 = tf.placeholder(tf.int32, [self.config.batch_size, self.config.seq_len], name=TID_0) seq_len_0 = tf.placeholder(tf.int32, [ None, ], name=SEQ_LEN_0) tid_1 = tf.placeholder(tf.int32, [self.config.batch_size, self.config.seq_len], name=TID_1) seq_len_1 = tf.placeholder(tf.int32, [ None, ], name=SEQ_LEN_1) tid_2 = tf.placeholder(tf.int32, [self.config.batch_size, self.config.seq_len], name=TID_2) seq_len_2 = tf.placeholder(tf.int32, [ None, ], name=SEQ_LEN_2) embedded_0 = tf.nn.embedding_lookup(lookup_table, tid_0) embedded_0 = add_gaussian_noise_layer( embedded_0, stddev=self.config.embedding_noise_stddev, test_mode=test_mode) embedded_1 = tf.nn.embedding_lookup(lookup_table, tid_1) embedded_1 = add_gaussian_noise_layer( embedded_1, stddev=self.config.embedding_noise_stddev, test_mode=test_mode) embedded_2 = tf.nn.embedding_lookup(lookup_table, tid_2) embedded_2 = add_gaussian_noise_layer( embedded_2, stddev=self.config.embedding_noise_stddev, test_mode=test_mode) with tf.variable_scope("rnn_0") as scope: _, rnn_last_states_0 = tf.nn.dynamic_rnn(rnn_cell.build_gru( self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob), inputs=embedded_0, sequence_length=seq_len_0, dtype=tf.float32) with tf.variable_scope("rnn_1") as scope: _, rnn_last_states_1 = tf.nn.dynamic_rnn(rnn_cell.build_gru( self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob), inputs=embedded_1, sequence_length=seq_len_1, dtype=tf.float32) with tf.variable_scope("rnn_2") as scope: _, rnn_last_states_2 = tf.nn.dynamic_rnn(rnn_cell.build_gru( self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob), inputs=embedded_2, sequence_length=seq_len_2, dtype=tf.float32) dense_input = tf.concat( [rnn_last_states_0, rnn_last_states_1, rnn_last_states_2], axis=1, name=HIDDEN_FEAT) y, w, b = dense.build(dense_input, dim_output=self.config.output_dim, output_name=PROB_PREDICT) # 计算loss _loss_1 = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=y, labels=label_gold, weights=sample_weights)) _loss_2 = tf.constant(0., dtype=tf.float32) if self.config.l2_reg_lambda is not None and self.config.l2_reg_lambda > 0: _loss_2 += self.config.l2_reg_lambda * tf.nn.l2_loss(w) loss = tf.add(_loss_1, _loss_2, name=LOSS) # 预测标签 tf.cast(tf.argmax(y, 1), tf.int32, name=LABEL_PREDICT) # 统一的后处理 self.build_optimizer(loss=loss) self.set_graph(graph=tf.get_default_graph())
def build_neural_network(self, lookup_table): test_mode = tf.placeholder(tf.int8, None, name=TEST_MODE) label_gold = tf.placeholder(tf.int32, [ None, ], name=LABEL_GOLD) token_id_seq = tf.placeholder( tf.int32, [self.config.batch_size, self.config.seq_len], name=TOKEN_ID_SEQ) seq_len = tf.placeholder(tf.int32, [ None, ], name=SEQ_LEN) sample_weights = tf.placeholder(tf.float32, [ None, ], name=SAMPLE_WEIGHTS) lookup_table = tf.Variable(lookup_table, dtype=tf.float32, name=LOOKUP_TABLE, trainable=self.config.embedding_trainable) embedded = tf.nn.embedding_lookup(lookup_table, token_id_seq) embedded = add_gaussian_noise_layer( embedded, stddev=self.config.embedding_noise_stddev, test_mode=test_mode) dropout_keep_prob = build_dropout_keep_prob( keep_prob=self.config.dropout_keep_prob, test_mode=test_mode) rnn_outputs, rnn_last_states = tf.nn.dynamic_rnn( rnn_cell.build_gru(self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob), inputs=embedded, sequence_length=seq_len, dtype=tf.float32) dense_input = tf.concat([ rnn_last_states, ], axis=1, name=HIDDEN_FEAT) if not self.config.binary_classification: y, w, b = dense.build(dense_input, dim_output=self.config.output_dim, output_name=PROB_PREDICT) # 计算loss _loss_1 = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=y, labels=label_gold, weights=sample_weights)) else: y, w, b = dense.build(dense_input, dim_output=1, activation=tf.nn.sigmoid) _loss_1 = -tf.reduce_mean( y * tf.log(tf.clip_by_value(label_gold, 1e-10, 1.0)) + (1 - y) * tf.log(tf.clip_by_value(1 - label_gold, 1e-10, 1.0))) _loss_2 = tf.constant(0., dtype=tf.float32) if self.config.l2_reg_lambda is not None and self.config.l2_reg_lambda > 0: _loss_2 += self.config.l2_reg_lambda * tf.nn.l2_loss(w) loss = tf.add(_loss_1, _loss_2, name=LOSS) # 预测标签 tf.cast(tf.argmax(y, 1), tf.int32, name=LABEL_PREDICT) # 统一的后处理 self.build_optimizer(loss=loss) self.set_graph(graph=tf.get_default_graph())