def build_rnn(rnn_config, dropout=None): _dropout_keep_prob = dropout if rnn_config['use_dropout'] else None if rnn_config['type'] == 'gru': return rnn_cell.build_gru(rnn_config['dim'], dropout_keep_prob=_dropout_keep_prob) elif rnn_config['type'] == 'lstm': return rnn_cell.build_lstm(rnn_config['dim'], dropout_keep_prob=_dropout_keep_prob) else: raise ValueError(rnn_config)
def build_neural_network(self, lookup_table): test_mode = tf.placeholder(tf.int8, None, name=TEST_MODE) label_gold = tf.placeholder(tf.int32, [ None, ], name=LABEL_GOLD) sample_weights = tf.placeholder(tf.float32, [ None, ], name=SAMPLE_WEIGHTS) lookup_table = tf.Variable(lookup_table, dtype=tf.float32, name=LOOKUP_TABLE, trainable=self.config.embedding_trainable) dropout_keep_prob = build_dropout_keep_prob( keep_prob=self.config.dropout_keep_prob, test_mode=test_mode) tid = tf.placeholder(tf.int32, [self.config.batch_size, self.config.seq_len], name=TID) seq_len = tf.placeholder(tf.int32, [ None, ], name=SEQ_LEN) embedded = tf.nn.embedding_lookup(lookup_table, tid) if self.config.embedding_noise_type is None: pass elif self.config.embedding_noise_type == 'gaussian': embedded = add_gaussian_noise_layer( embedded, stddev=self.config.embedding_noise_stddev, test_mode=test_mode) elif self.config.embedding_noise_type == 'dropout': emb_dropout_keep_prob = build_dropout_keep_prob( keep_prob=self.config.embedding_dropout_keep_prob, test_mode=test_mode) embedded = tf.nn.dropout(embedded, emb_dropout_keep_prob) else: raise Exception('unknown embedding noise type: {}'.format( self.config.embedding_noise_type)) outputs = embedded last_state = None for i, rnn_dim in enumerate(self.config.rnns): with tf.variable_scope('layer_{}'.format(i)): cell_fw = rnn_cell.build_lstm( rnn_dim, dropout_keep_prob=dropout_keep_prob) cell_bw = rnn_cell.build_lstm( rnn_dim, dropout_keep_prob=dropout_keep_prob) outputs, output_states = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, outputs, seq_len, cell_fw.zero_state(self.config.batch_size, tf.float32), cell_bw.zero_state(self.config.batch_size, tf.float32)) outputs = tf.concat(outputs, axis=-1) last_state = tf.concat([states[0] for states in output_states], axis=-1) if self.config.use_attention: last_state, _ = attention.build(outputs, self.config.attention_dim) dense_input = tf.concat([ last_state, ], axis=1, name=HIDDEN_FEAT) dense_input = tf.nn.dropout(dense_input, keep_prob=dropout_keep_prob) l2_component = None for conf in self.config.dense_layers: dense_input, w, _ = dense.build(dense_input, dim_output=conf['dim'], activation=getattr( tf.nn, conf['activation'])) if conf.get('l2', 0.) > 0: comp = conf['l2'] * tf.nn.l2_loss(w) if l2_component is None: l2_component = comp else: l2_component += comp l2_w_list = list() if self.config.max_out is None: y, w, b = dense.build(dense_input, dim_output=self.config.output_dim, output_name=PROB_PREDICT) l2_w_list.append(w) else: y_list = list() for dim in self.config.max_out: y, w, b = dense.build(dense_input, dim_output=dim) y = tf.expand_dims(tf.reduce_max(y, 1), axis=1) y_list.append(y) l2_w_list.append(w) y = tf.concat(y_list, axis=1, name=PROB_PREDICT) # 计算loss _loss_1 = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=y, labels=label_gold, weights=sample_weights)) _loss_2 = tf.constant(0., dtype=tf.float32) if self.config.l2_reg_lambda is not None and self.config.l2_reg_lambda > 0: for w in l2_w_list: _loss_2 += self.config.l2_reg_lambda * tf.nn.l2_loss(w) if l2_component is not None: _loss_2 += l2_component loss = tf.add(_loss_1, _loss_2, name=LOSS) # 预测标签 tf.cast(tf.argmax(y, 1), tf.int32, name=LABEL_PREDICT) # 统一的后处理 self.build_optimizer(loss=loss) self.set_graph(graph=tf.get_default_graph())
def build_neural_network(self, lookup_table): test_mode = tf.placeholder(tf.int8, None, name=TEST_MODE) label_gold = tf.placeholder(tf.int32, [ None, ], name=LABEL_GOLD) token_id_seq = tf.placeholder( tf.int32, [self.config.batch_size, self.config.seq_len], name=TOKEN_ID_SEQ) seq_len = tf.placeholder(tf.int32, [ None, ], name=SEQ_LEN) sample_weights = tf.placeholder(tf.float32, [ None, ], name=SAMPLE_WEIGHTS) lookup_table = tf.Variable(lookup_table, dtype=tf.float32, name=LOOKUP_TABLE, trainable=self.config.embedding_trainable) embedded = tf.nn.embedding_lookup(lookup_table, token_id_seq) embedded = add_gaussian_noise_layer( embedded, stddev=self.config.embedding_noise_stddev, test_mode=test_mode) dropout_keep_prob = build_dropout_keep_prob( keep_prob=self.config.dropout_keep_prob, test_mode=test_mode) with tf.variable_scope("blstm_layer_1") as scope: cell_fw = rnn_cell.build_lstm(self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob) cell_bw = rnn_cell.build_lstm(self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob) outputs, output_states = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, embedded, seq_len, cell_fw.zero_state(self.config.batch_size, tf.float32), cell_bw.zero_state(self.config.batch_size, tf.float32)) outputs = tf.concat(outputs, axis=-1) with tf.variable_scope("blstm_layer_2") as scope: cell_fw = rnn_cell.build_lstm(self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob) cell_bw = rnn_cell.build_lstm(self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob) outputs, output_states = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, outputs, seq_len, cell_fw.zero_state(self.config.batch_size, tf.float32), cell_bw.zero_state(self.config.batch_size, tf.float32)) outputs = tf.concat(outputs, axis=-1) attention_output, _ = attention.build(outputs, self.config.attention_dim) dense_input = tf.concat([ attention_output, ], axis=1, name=HIDDEN_FEAT) if not self.config.binary_classification: y, w, b = dense.build(dense_input, dim_output=self.config.output_dim, output_name=PROB_PREDICT) # 计算loss _loss_1 = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=y, labels=label_gold, weights=sample_weights)) else: y, w, b = dense.build(dense_input, dim_output=1, activation=tf.nn.sigmoid) _loss_1 = -tf.reduce_mean( y * tf.log(tf.clip_by_value(label_gold, 1e-10, 1.0)) + (1 - y) * tf.log(tf.clip_by_value(1 - label_gold, 1e-10, 1.0))) _loss_2 = tf.constant(0., dtype=tf.float32) if self.config.l2_reg_lambda is not None and self.config.l2_reg_lambda > 0: _loss_2 += self.config.l2_reg_lambda * tf.nn.l2_loss(w) loss = tf.add(_loss_1, _loss_2, name=LOSS) # 预测标签 tf.cast(tf.argmax(y, 1), tf.int32, name=LABEL_PREDICT) # 统一的后处理 self.build_optimizer(loss=loss) self.set_graph(graph=tf.get_default_graph())
def build_neural_network(self, lookup_table): test_mode = tf.placeholder(tf.int8, None, name=TEST_MODE) label_gold = tf.placeholder(tf.int32, [ None, ], name=LABEL_GOLD) sample_weights = tf.placeholder(tf.float32, [ None, ], name=SAMPLE_WEIGHTS) lookup_table = tf.Variable(lookup_table, dtype=tf.float32, name=LOOKUP_TABLE, trainable=self.config.embedding_trainable) dropout_keep_prob = build_dropout_keep_prob( keep_prob=self.config.dropout_keep_prob, test_mode=test_mode) tid_ = [list() for _ in range(3)] seq_len_ = [list() for _ in range(3)] embedded_ = [list() for _ in range(3)] for i in range(3): tid_[i] = tf.placeholder( tf.int32, [self.config.batch_size, self.config.seq_len], name=TID_[i]) seq_len_[i] = tf.placeholder(tf.int32, [ None, ], name=SEQ_LEN_[i]) embedded_[i] = tf.nn.embedding_lookup(lookup_table, tid_[i]) if self.config.embedding_noise_type is None: pass elif self.config.embedding_noise_type == 'gaussian': for i in range(3): embedded_[i] = add_gaussian_noise_layer( embedded_[i], stddev=self.config.embedding_noise_stddev, test_mode=test_mode) elif self.config.embedding_noise_type == 'dropout': emb_dropout_keep_prob = build_dropout_keep_prob( keep_prob=self.config.embedding_dropout_keep_prob, test_mode=test_mode) for i in range(3): embedded_[i] = tf.nn.dropout(embedded_[i], emb_dropout_keep_prob) else: raise Exception('unknown embedding noise type: {}'.format( self.config.embedding_noise_type)) last_states = list() for i in range(3): with tf.variable_scope('turn{}'.format(i)): outputs, last_state = tf.nn.dynamic_rnn( rnn_cell.build_lstm(self.config.rnn_dim, dropout_keep_prob=dropout_keep_prob), inputs=embedded_[i], sequence_length=seq_len_[i], dtype=tf.float32) last_state = last_state[0] if self.config.use_attention: last_state, _ = attention.build(outputs, self.config.attention_dim) last_states.append(last_state) last_states = list( map( lambda _state: tf.nn.dropout(_state, keep_prob=dropout_keep_prob), last_states)) dense_input = tf.concat(last_states, axis=1, name=HIDDEN_FEAT) l2_component = None for conf in self.config.dense_layers: dense_input, w, _ = dense.build(dense_input, dim_output=conf['dim'], activation=getattr( tf.nn, conf['activation'])) if conf.get('l2', 0.) > 0: comp = conf['l2'] * tf.nn.l2_loss(w) if l2_component is None: l2_component = comp else: l2_component += comp l2_w_list = list() if self.config.max_out is None: y, w, b = dense.build(dense_input, dim_output=self.config.output_dim, output_name=PROB_PREDICT) l2_w_list.append(w) else: y_list = list() if len(self.config.max_out) != self.config.output_dim: raise ValueError('invalid max_out config') for dim in self.config.max_out: y, w, b = dense.build(dense_input, dim_output=dim) y = tf.expand_dims(tf.reduce_max(y, 1), axis=1) y_list.append(y) l2_w_list.append(w) y = tf.concat(y_list, axis=1, name=PROB_PREDICT) # 计算loss _loss_1 = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=y, labels=label_gold, weights=sample_weights)) _loss_2 = tf.constant(0., dtype=tf.float32) if self.config.l2_reg_lambda is not None and self.config.l2_reg_lambda > 0: for w in l2_w_list: _loss_2 += self.config.l2_reg_lambda * tf.nn.l2_loss(w) if l2_component is not None: _loss_2 += l2_component loss = tf.add(_loss_1, _loss_2, name=LOSS) # 预测标签 tf.cast(tf.argmax(y, 1), tf.int32, name=LABEL_PREDICT) # 统一的后处理 self.build_optimizer(loss=loss) self.set_graph(graph=tf.get_default_graph())