def bag_cross_max(x, scope, rel_tot, is_training, var_scope=None, dropout_before=False, keep_prob=1.0): """ Cross-sentence Max-pooling proposed by (Jiang et al. 2016.) "Relation Extraction with Multi-instance Multi-label Convolutional Neural Networks" https://pdfs.semanticscholar.org/8731/369a707046f3f8dd463d1fd107de31d40a24.pdf """ with tf.variable_scope(var_scope or "cross_max", reuse=tf.AUTO_REUSE): if dropout_before: x = dropout(x, keep_prob) bag_repre = [] for i in range(scope.shape[0] - 1): bag_hidden_mat = x[scope[i]:scope[i + 1]] bag_repre.append(tf.reduce_max( bag_hidden_mat, 0)) # (n', hidden_size) -> (hidden_size) bag_repre = tf.stack(bag_repre) if not dropout_before: bag_repre = dropout(bag_repre, keep_prob) bag_logit = _logit(bag_repre, rel_tot) if not is_training: bag_logit = tf.nn.softmax(bag_logit) return bag_logit, bag_repre
def bag_attention(x, scope, instance_label, rel_tot, is_training, var_scope=None, dropout_before=False, keep_prob=1.0): with tf.variable_scope(var_scope or "bag_attention", reuse=tf.AUTO_REUSE): if is_training: # training if dropout_before: x = dropout(x, keep_prob) bag_repre = [] attention_logit = _attention_train_logit(x, instance_label, rel_tot) for i in range(scope.shape[0] - 1): bag_hidden_mat = x[scope[i]:scope[i + 1]] attention_score = tf.nn.softmax( attention_logit[scope[i]:scope[i + 1]], -1) # (1, n') x (n', hidden_size) = (1, hidden_size) -> (hidden_size) bag_repre.append( tf.squeeze( tf.matmul(tf.expand_dims(attention_score, 0), bag_hidden_mat))) bag_repre = tf.stack(bag_repre) if not dropout_before: bag_repre = dropout(bag_repre, keep_prob) return _logit(bag_repre, rel_tot), bag_repre else: # testing attention_logit = _attention_test_logit(x, rel_tot) # (n, rel_tot) bag_repre = [] bag_logit = [] for i in range(scope.shape[0] - 1): bag_hidden_mat = x[scope[i]:scope[i + 1]] attention_score = tf.nn.softmax( tf.transpose(attention_logit[scope[i]:scope[i + 1], :]), -1) # softmax of (rel_tot, n') bag_repre_for_each_rel = tf.matmul( attention_score, bag_hidden_mat ) # (rel_tot, n') \dot (n', hidden_size) = (rel_tot, hidden_size) bag_logit_for_each_rel = _logit( bag_repre_for_each_rel, rel_tot) # -> (rel_tot, rel_tot) bag_repre.append(bag_repre_for_each_rel) bag_logit.append( tf.diag_part( tf.nn.softmax(bag_logit_for_each_rel, -1))) # could be improved by sigmoid? bag_repre = tf.stack(bag_repre) bag_logit = tf.stack(bag_logit) return bag_logit, bag_repre
def cnn(x, mask=None, hidden_size=230, kernel_size=3, stride_size=1, activation=tf.nn.relu, var_scope=None, keep_prob=1.0): with tf.variable_scope(var_scope or ('cnn' if mask is None else 'pcnn'), reuse=tf.AUTO_REUSE): cnn_cell = _cnn_cell(x, hidden_size, kernel_size, stride_size) pool = _pooling(cnn_cell) if mask is None else _piecewise_pooling(cnn_cell, mask) return dropout(activation(pool), keep_prob)
def bag_one(x, scope, label, rel_tot, is_training, var_scope=None, dropout_before=False, keep_prob=1.0): with tf.variable_scope(var_scope or "maximum", reuse=tf.AUTO_REUSE): if is_training: # training if dropout_before: x = dropout(x, keep_prob) bag_repre = [] for i in range(scope.shape[0] - 1): bag_hidden_mat = x[scope[i]:scope[i + 1]] instance_logit = tf.nn.softmax( _logit(bag_hidden_mat, rel_tot), -1) # (n', hidden_size)->(n', rel_tot) j = tf.argmax(instance_logit[:, label[i]], output_type=tf.int32) bag_repre.append(bag_hidden_mat[j]) bag_repre = tf.stack(bag_repre) if not dropout_before: bag_repre = dropout(bag_repre, keep_prob) return _logit(bag_repre, rel_tot), bag_repre else: # testing if dropout_before: x = dropout(x, keep_prob) bag_repre = [] bag_logit = [] for i in range(scope.shape[0] - 1): bag_hidden_mat = x[scope[i]:scope[i + 1]] instance_logit = tf.nn.softmax( _logit(bag_hidden_mat, rel_tot), -1) # (n', hidden_size)->(n', rel_tot) bag_logit.append(tf.reduce_max(instance_logit, 0)) bag_repre.append(bag_hidden_mat[0]) # fake max repre bag_logit = tf.stack(bag_logit) bag_repre = tf.stack(bag_repre) return tf.nn.softmax(bag_logit), bag_repre
def rnn(x, length, hidden_size=230, cell_name='', bidirectional=False, var_scope=None, keep_prob=1.0): with tf.variable_scope(var_scope or ('birnn' if bidirectional else 'rnn'), reuse=tf.AUTO_REUSE): x = dropout(x, keep_prob) if bidirectional: bw_states, fw_states = birnn_states(x, length, hidden_size, cell_name) return tf.concat([fw_states, bw_states], axis=1) else: cell = _rnn_cell(hidden_size, cell_name) _, states = tf.nn.dynamic_rnn(cell, x, sequence_length=length, dtype=tf.float32, scope='dynamic_rnn') if isinstance(states, tuple): states = states[0] return states
def bag_average(x, scope, rel_tot, is_training, var_scope=None, dropout_before=False, keep_prob=1.0): with tf.variable_scope(var_scope or "average", reuse=tf.AUTO_REUSE): if dropout_before: x = dropout(x, keep_prob) bag_repre = [] for i in range(scope.shape[0] - 1): bag_hidden_mat = x[scope[i]:scope[i + 1]] bag_repre.append(tf.reduce_mean( bag_hidden_mat, 0)) # (n', hidden_size) -> (hidden_size) bag_repre = tf.stack(bag_repre) if not dropout_before: bag_repre = dropout(bag_repre, keep_prob) bag_logit = _logit(bag_repre, rel_tot) if not is_training: bag_logit = tf.nn.softmax(bag_logit) return bag_logit, bag_repre
def resnet(x, filters, length=None, cell_name='lstm', bidirectional=False, mask=None, ib_num=4, kernel_size=3, stride_size=1, activation=tf.nn.relu, var_scope=None, keep_prob=1.0): with tf.variable_scope(var_scope or ('resnet' if mask is None else 'resnet_pcnn'), reuse=tf.AUTO_REUSE): seq = None if length is None else rnn(x, length, filters[1], cell_name, bidirectional, keep_prob=keep_prob) x = _cnn_cell(x, filters[1], kernel_size, stride_size, activation=activation) # x = tf.expand_dims(_pooling(x), axis=1) if mask is None else _piecewise_pooling(x, mask, True) for i in range(ib_num): h1 = _cnn_cell(x, filters[0], kernel_size, stride_size, activation=activation, var_scope='conv_' + str(i) + 'a') h2 = _cnn_cell(h1, filters[1], kernel_size, stride_size, activation=activation, var_scope='conv_' + str(i) + 'b') x = h2 + x x = _pooling(x) if mask is None else _piecewise_pooling(x, mask) # x = tf.squeeze(x) if mask is None else tf.reshape(x, [-1, x.shape[-1] * x.shape[-2]]) # x = conv_block(x, kernel_size, [hidden_size, hidden_size, 256], stage=2, block='a', # strides=(stride_size, stride_size)) # x = identity_block(x, kernel_size, [hidden_size, hidden_size, 256], stage=2, block='b') # x = identity_block(x, kernel_size, [hidden_size, hidden_size, 256], stage=2, block='c') x = dropout(activation(x), keep_prob) return x if seq is None else tf.concat([seq, x], axis=1)
def instance(x, rel_tot, var_scope=None, keep_prob=1.0): with tf.variable_scope(var_scope or "instance", reuse=tf.AUTO_REUSE): x = dropout(x, keep_prob) return _logit(x, rel_tot), x