def build(self, z, train): with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE): batch_size = tf.shape(z)[0] layers = [z] with tf.variable_scope("layer0"): layers.append(linear(layers[-1], 128)) layers.append(tf.nn.relu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope("layer1"): layers.append(linear(layers[-1], 4 * 4 * 64)) layers.append(tf.nn.relu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) layers.append(tf.reshape(layers[-1], [-1, 4, 4, 64])) with tf.variable_scope("layer2"): layers.append( deconv2d(layers[-1], [batch_size, 8, 8, 64], d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(lrelu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope("layer3"): layers.append( deconv2d(layers[-1], [batch_size, 16, 16, 32], d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(lrelu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope("layer4"): layers.append( deconv2d(layers[-1], [batch_size, 32, 32, 32], d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(lrelu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope("layer5"): layers.append( deconv2d(layers[-1], [ batch_size, self.output_height, self.output_width, self.output_depth ], d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(tf.nn.sigmoid(layers[-1])) return layers[-1], layers
def build(self, images, train): with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE): layers = [images] with tf.variable_scope("layer0"): layers.append(conv2d( layers[-1], 32, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel, sn_op=None)) layers.append(lrelu(layers[-1])) with tf.variable_scope("layer1"): layers.append(conv2d( layers[-1], 32, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel, sn_op=None)) layers.append(lrelu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope("layer2"): layers.append(conv2d( layers[-1], 64, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel, sn_op=None)) layers.append(lrelu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope("layer3"): layers.append(conv2d( layers[-1], 64, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel, sn_op=None)) layers.append(lrelu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope("layer4"): layers.append(linear(layers[-1], 128, sn_op=None)) layers.append(lrelu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope("layer5"): layers.append(linear(layers[-1], self.output_length)) return layers[-1], layers
def forward(self, inputs: tf.Tensor, dim: int = None, scope: t.Union[str, tf.VariableScope] = None): scope = scope if scope else 'forward' kwargs = {'dim': dim if dim else self.project_dim, 'keep_prob': self.keep_prob, } with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): t = op.linear(inputs, scope='linear-1', **kwargs) t = op.linear(t, scope='linear-2', **kwargs) return t
def forward(self, inputs: tf.Tensor, dim: int = None, scope: t.Union[str, tf.VariableScope] = None): scope = scope if scope else 'forward' kwargs = { 'dim': dim if dim else -1, 'keep_prob': self.keep_prob, 'weight_init': tf.truncated_normal_initializer(stddev=0.01) } with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): t = op.linear(inputs, scope='linear-1', **kwargs) t = op.linear(t, scope='linear-2', **kwargs) return t
def build(self, image, train): with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE): layers = [image] with tf.variable_scope("layer0"): layers.append(conv2d( layers[-1], 32, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(tf.nn.relu(layers[-1])) with tf.variable_scope("layer1"): layers.append(conv2d( layers[-1], 32, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(tf.nn.relu(layers[-1])) with tf.variable_scope("layer2"): layers.append(conv2d( layers[-1], 64, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(tf.nn.relu(layers[-1])) with tf.variable_scope("layer3"): layers.append(conv2d( layers[-1], 64, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(tf.nn.relu(layers[-1])) with tf.variable_scope("layer4"): layers.append(linear(layers[-1], 128)) with tf.variable_scope("layer5-mean"): mean = linear(layers[-1], self.output_length) with tf.variable_scope("layer5-logvar"): logvar = linear(layers[-1], self.output_length) layers.append(mean) layers.append(logvar) return mean, logvar, layers
def build(self, z, train): with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE): layers = [z] for i in range(self.num_layers - 1): with tf.variable_scope("layer{}".format(i)): layers.append(linear(layers[-1], self.l_dim)) layers.append(lrelu(layers[-1])) with tf.variable_scope("layer{}".format(self.num_layers - 1)): layers.append(linear(layers[-1], 2)) logit = layers[-1] layers.append(tf.nn.softmax(layers[-1])) prob = layers[-1] return logit, prob, layers
def fact_impl1(self, scope, x): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): # NOTE: project to low-dimensonal space if self.fact_proj_dim > 0: x = op.linear(x, dim=self.fact_proj_dim, activation_fn=None) input_dim = x.get_shape()[2] fact_wght = op.get_variable('fact_weight', shape=(input_dim)) fact_bias = op.get_variable('fact_bias', shape=(1)) fact_intr = op.get_variable('fact_inter', shape=(input_dim, self.fact_intr_dim)) l = (tf.reduce_sum(x * tf.reshape(fact_wght, [1, 1, -1]), -1) + fact_bias) # shape: [batch, seq_len] intr_mat = tf.matmul(fact_intr, tf.matrix_transpose(fact_intr)) # shape: [input, input_dim] mask = tf.sequence_mask(tf.range(input_dim), maxlen=input_dim, dtype=tf.float32) # shape: [encode_dim, input_dim] p = tf.reduce_sum( tf.matmul(tf.expand_dims(x, 2), tf.expand_dims(x, 3)) * # shape: [batch, seq_len, input_dim, input_dim] tf.expand_dims(tf.expand_dims(intr_mat, 0), 0), #tf.expand_dims(tf.expand_dims(intr_mat * mask, 0), 0), # shape: [1, 1, input_dim, input_dim] [2, 3]) # shape: [batch, seq_len] return l + p
def build(self, input_attribute, train): with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE): input_attribute = flatten(input_attribute) layers = [input_attribute] for i in range(self.num_layers - 1): with tf.variable_scope("layer{}".format(i)): layers.append(linear(layers[-1], self.num_units)) layers.append(tf.nn.relu(layers[-1])) # if (i > 0): # layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope("layer{}".format(self.num_layers - 1)): layers.append(linear(layers[-1], 1)) # batch_size * 1 layers.append(tf.squeeze(layers[-1], 1)) # batch_size return layers[-1]
def build(self, image, train): with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE): layers = [image] with tf.variable_scope("layer0"): layers.append( conv2d(layers[-1], self.start_depth, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(lrelu(layers[-1])) with tf.variable_scope("layer1"): layers.append( conv2d(layers[-1], self.start_depth * 2, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(batch_norm()(layers[-1], train=train)) layers.append(lrelu(layers[-1])) with tf.variable_scope("layer2"): layers.append( conv2d(layers[-1], self.start_depth * 4, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(batch_norm()(layers[-1], train=train)) layers.append(lrelu(layers[-1])) with tf.variable_scope("layer3"): layers.append( conv2d(layers[-1], self.start_depth * 8, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel)) layers.append(batch_norm()(layers[-1], train=train)) layers.append(lrelu(layers[-1])) with tf.variable_scope("layer4"): layers.append(linear(layers[-1], self.output_length)) return layers[-1], layers
def self_attent(x, padded_len): t1 = tf.tile(tf.expand_dims(x, 2), [1, 1, tf.shape(x)[1], 1]) t2 = tf.tile(tf.expand_dims(x, 1), [1, tf.shape(x)[1], 1, 1]) # shape: [batch, seq_len, seq_len, encode_dim] t = tf.reshape( tf.concat([t1, t2, t1 * t2], 3), [batch_size, padded_len**2, 3 * self.encode_dim]) # shape: [batch, seq_len^2, 3 * encode_dim] att = op.linear(t, dim=1, bias=None, activation_fn=None) # shape: [batch, seq_len^2, 1] att = tf.reshape(att, [batch_size, padded_len, padded_len]) # shape: [batch, seq_len, seq_len] soft_align = tf.einsum('bik,bkj->bij', tf.nn.softmax(att), x) return op.gated_fuse(x, soft_align)
def fact_impl2(self, scope, x): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): if self.fact_proj_dim > 0: x = op.linear(x, dim=self.fact_proj_dim, activation_fn=None) input_dim = x.get_shape()[2] fact_wght = op.get_variable('fact_weight', shape=(input_dim)) fact_bias = op.get_variable('fact_bias', shape=(1)) fact_intr = op.get_variable('fact_inter', shape=(input_dim, self.fact_intr_dim)) l = (tf.reduce_sum(x * tf.reshape(fact_wght, [1, 1, -1]), -1) + fact_bias) # shape: [batch, seq_len] intr_mat = tf.matmul(fact_intr, tf.matrix_transpose(fact_intr)) # shape: [input, input_dim] mask = tf.sequence_mask(tf.range(input_dim), maxlen=input_dim, dtype=tf.float32) # shape: [encode_dim, input_dim] i = tf.constant(0) x_shape = tf.shape(x) batch_size, seq_len = x_shape[0], x_shape[1] p = tf.reshape(tf.zeros([batch_size]), [batch_size, -1]) def loop_cond(i, x, p, seq_len): return tf.less(i, seq_len) def loop_body(i, x, p, seq_len): x_vect = x[:, i] # shape: [batch, input_dim] #x_mat = tf.matmul(tf.expand_dims(x_vect, 1), # tf.expand_dims(x_vect, 2)) # NOTE: Avoid Internal Error: Blas xGEMMBatched launch failed x_mat = (tf.tile(tf.expand_dims(x_vect, 1), [1, input_dim, 1]) * tf.tile(tf.expand_dims(x_vect, 2), [1, 1, input_dim])) # shape: [batch, input_dim, input_dim] p_i = tf.reduce_sum(intr_mat * x_mat, [1, 2]) p_i = tf.expand_dims(p_i, 1) # shape: [batch, 1] p = tf.concat([p, p_i], 1) return [i, x, p, seq_len] _, _, p_loop, _ = tf.while_loop(loop_cond, loop_body, [i, x, p, seq_len], parallel_iterations=1) return l + p_loop[:, 1:]
def __init__( self, embeddings: embed.IndexedWordEmbedding, class_num: int, project_dim: int, ) -> None: super(ResModel, self).__init__() self._class_num = class_num self.project_dim = project_dim self.keep_prob = tf.placeholder(tf.float32, shape=[]) def mask(x, x_len): # Explict mask the paddings. mask = tf.sequence_mask(x_len, tf.shape(x)[1], dtype=tf.float32) return tf.expand_dims(mask, -1) # mask1, mask2 = mask(self.x1, self.len1), mask(self.x2, self.len2) with tf.variable_scope('embed') as s: embed = tf.constant(embeddings.get_embeddings(), dtype=tf.float32, name='embeddings') x1, x2 = map(lambda x: tf.gather(embed, x), [self.x1, self.x2]) for i in range(1, 11): x1, x2 = self.layer(x1, x2, 'res-layer-%d' % i) with tf.variable_scope('aggregate') as s: v = tf.concat([ tf.reduce_max(x1, axis=1), tf.reduce_max(x2, axis=1), tf.reduce_sum(x1, axis=1), tf.reduce_sum(x2, axis=1) ], 1) y_hat = self.forward(v) y_hat = op.linear(y_hat, dim=self._class_num, activation_fn=None) self.evaluate_and_loss(y_hat)
def linear_BNReLU(x, scope): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): x = op.linear(x, dim=self.project_dim, activation_fn=None) x = tf.layers.batch_normalization(x) x = tf.nn.relu(x) return x
def build(self, image, train, sn_op): with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE): with tf.variable_scope("shared"): layers = [image] with tf.variable_scope("layer0"): layers.append( conv2d(layers[-1], self.start_depth, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel, sn_op=sn_op)) layers.append(lrelu(layers[-1])) with tf.variable_scope("layer1"): layers.append( conv2d(layers[-1], self.start_depth * 2, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel, sn_op=sn_op)) layers.append(lrelu(layers[-1])) with tf.variable_scope("layer2"): layers.append( conv2d(layers[-1], self.start_depth * 4, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel, sn_op=sn_op)) layers.append(lrelu(layers[-1])) with tf.variable_scope("layer3"): layers.append( conv2d(layers[-1], self.start_depth * 8, d_h=self.stride, d_w=self.stride, k_h=self.kernel, k_w=self.kernel, sn_op=sn_op)) layers.append(lrelu(layers[-1])) with tf.variable_scope("x"): image_layers = [layers[-1]] with tf.variable_scope("layer0"): image_layers.append( linear(image_layers[-1], 1, sn_op=sn_op)) image_layers.append(tf.nn.sigmoid(image_layers[-1])) with tf.variable_scope("q"): q_layers = [layers[-1]] with tf.variable_scope("layer0"): q_layers.append( linear(q_layers[-1], self.output_length, sn_op=sn_op)) layers.extend(image_layers) layers.extend(q_layers) return image_layers[-1], q_layers[-1], layers
def __init__( self, embeddings: embed.IndexedWordEmbedding, class_num: int, scale_l1: float = 0.0, scale_l2: float = 0.0, lstm_unit: int = 300, ) -> None: super(Test, self).__init__() self._class_num = class_num self.scale_l1 = scale_l1 self.scale_l2 = scale_l2 self.lstm_unit = lstm_unit self.batch_size = tf.shape(self.x1)[0] self.embed_dim = embeddings.get_embeddings().shape[-1] self.keep_prob = tf.placeholder(tf.float32, shape=[]) self.op_var_kwargs = { 'scale_l1': self.scale_l1, 'scale_l2': self.scale_l2 } self.op_kwargs = { 'scale_l1': self.scale_l1, 'scale_l2': self.scale_l2, 'keep_prob': self.keep_prob, 'drop_after': False } with tf.variable_scope('embed') as s: #embed_init_var = embeddings.get_embeddings() #embed = op.get_variable('embeddings', # shape=embed_init_var.shape, # initializer=tf.constant_initializer(embed_init_var)) embed = tf.constant(embeddings.get_embeddings(), dtype=tf.float32, name='embeddings') embed_dim = embed.get_shape()[-1] x1, x2 = map(lambda x: tf.gather(embed, x), [self.x1, self.x2]) with tf.variable_scope('unfold', reuse=tf.AUTO_REUSE) as s: x1 = self.unfold_tree(x1, self.temp1, self.tag1, self.len1, 'x1') x2 = self.unfold_tree(x2, self.temp2, self.tag2, self.len2, 'x2') with tf.variable_scope('encode', reuse=tf.AUTO_REUSE) as s: x1, x2 = map(lambda x: tf.nn.dropout(x, self.keep_prob), [x1, x2]) x1, x2 = map(self.bilstm, [x1, x2]) # shape: [batch, seq_len, embed_dim * 2] with tf.variable_scope('attent') as s: sim = tf.matmul(x1, tf.matrix_transpose(x2)) alpha = tf.matmul(tf.nn.softmax(tf.matrix_transpose(sim)), x1) beta = tf.matmul(tf.nn.softmax(sim), x2) x1 = tf.concat([x1, beta, x1 * beta, x1 - beta], 2) x2 = tf.concat([x2, alpha, x2 * alpha, x2 - alpha], 2) # shape: [batch, seq_len, embed_dim * 8] with tf.variable_scope('decode', reuse=tf.AUTO_REUSE) as s: x1, x2 = map(lambda x: op.linear(x, embed_dim, **self.op_kwargs), [x1, x2]) # NOTE: dropout here in the author's code # shape: [batch, seq_len, embed_dim] x1, x2 = map(self.bilstm, [x1, x2]) # shape: [batch, seq_len, embed_dim * 2] with tf.variable_scope('aggregate') as s: def pool(x): return tf.concat( [tf.reduce_sum(x, axis=1), tf.reduce_max(x, axis=1)], 1) y_hat = op.linear(tf.concat([pool(x1), pool(x2)], 1), dim=embed_dim, activation_fn=tf.nn.tanh, scope='linear-1', **self.op_kwargs) # shape: [batch, embed_dim * 8] y_hat = op.linear(y_hat, dim=self._class_num, activation_fn=None, scope='linear-2', **self.op_kwargs) # shape: [batch, class_num] self.evaluate_and_loss(y_hat)
def co_attent(t1, t2): t1 = op.linear(t1, **op_kwargs) t2 = op.linear(t2, **op_kwargs) return tf.matmul(t1, tf.matrix_transpose(t2))
def __init__(self, embeddings: embed.IndexedWordEmbedding, class_num: int, project_dim: int = 500, ) -> None: super(AttentiveModel, self).__init__() self.project_dim = project_dim self._class_num = class_num self.keep_prob = tf.placeholder(tf.float32, shape=[]) def mask(x, x_len): # Explict mask the paddings. mask = tf.sequence_mask(x_len, tf.shape(x)[1], dtype=tf.float32) return tf.expand_dims(mask, -1) mask1, mask2 = mask(self.x1, self.len1), mask(self.x2, self.len2) with tf.variable_scope('embed') as s: embed = tf.constant(embeddings.get_embeddings(), dtype=tf.float32, name='embeddings') x1, x2 = map(lambda x: tf.gather(embed, x), [self.x1, self.x2]) x1, x2 = map(lambda x: op.linear(x, scope='project', dim=500), [x1, x2]) x1, x2 = map(lambda x: op.highway(x, scope='highway-1', dim=500), [x1, x2]) x1, x2 = map(lambda x: op.highway(x, scope='highway-2', dim=500), [x1, x2]) with tf.variable_scope('attent') as s: def soft_align(att_fn, scope): with tf.variable_scope(scope) as s: sim = att_fn(x1, x2) alpha = tf.matmul(tf.nn.softmax(tf.matrix_transpose(sim)), x1) beta = tf.matmul(tf.nn.softmax(sim), x2) return alpha, beta a1, b1 = soft_align(self.attention_mul, 'mul') #a2, b2 = soft_align(self.attention_diff, 'diff') #a3, b3 = soft_align(self.attention_dist, 'dist') with tf.variable_scope('compare') as s: #v1 = self.forward(tf.concat([x1, b1], 2)) #v2 = self.forward(tf.concat([x2, a1], 2)) v1 = op.linear(tf.concat([x1, b1], 2), dim=500) v2 = op.linear(tf.concat([x2, a1], 2), dim=500) v1, v2 = map(lambda x: op.highway(x, scope='highway-3', dim=500), [v1, v2]) v1, v2 = map(lambda x: op.highway(x, scope='highway-4', dim=500), [v1, v2]) with tf.variable_scope('aggregate') as s: # CHANGE #def reduce_mean(x, x_len): # return (tf.reduce_sum(x, axis=1) / # tf.expand_dims(tf.cast(x_len, tf.float32), -1)) def reduce_mean(x, x_len): return (tf.reduce_sum(x, axis=1) / tf.cast(tf.shape(x)[1], tf.float32)) v = tf.concat([ #reduce_mean(v1, self.len1), #reduce_mean(v2, self.len2), tf.reduce_max(v1, axis=1), tf.reduce_max(v2, axis=1), tf.reduce_sum(v1, axis=1), tf.reduce_sum(v2, axis=1) ], 1) y_hat = self.forward(v) y_hat = self.linear(y_hat, dim=self._class_num) self.evaluate_and_loss(y_hat)
def compute(i, state, last_output, all_output, gen_flag, all_gen_flag, all_cur_argmax, last_cell_output): input_all = [all_discrete_attribute] if self.noise: input_all.append(feature_input_noise_reshape[i]) if self.feed_back: if feature_input_data_dim == 3: input_all.append(feature_input_data_reshape[i]) else: input_all.append(last_output) input_all = tf.concat(input_all, axis=1) cell_new_output, new_state = rnn_network(input_all, state) new_output_all = [] id_ = 0 for j in range(self.sample_len): for k in range(len(self.feature_outputs)): with tf.variable_scope("output{}".format(id_), reuse=tf.AUTO_REUSE): output = self.feature_outputs[k] sub_output = linear(cell_new_output, output.dim) if (output.type_ == OutputType.DISCRETE): sub_output = tf.nn.softmax(sub_output) elif (output.type_ == OutputType.CONTINUOUS): if (output.normalization == Normalization.ZERO_ONE): sub_output = tf.nn.sigmoid(sub_output) elif (output.normalization == Normalization.MINUSONE_ONE): sub_output = tf.nn.tanh(sub_output) else: raise Exception("unknown normalization" " type") else: raise Exception("unknown output type") new_output_all.append(sub_output) id_ += 1 new_output = tf.concat(new_output_all, axis=1) for j in range(self.sample_len): all_gen_flag = all_gen_flag.write( i * self.sample_len + j, gen_flag) cur_gen_flag = tf.to_float( tf.equal( tf.argmax(new_output_all[( j * len(self.feature_outputs) + self.gen_flag_id)], axis=1), 0)) cur_gen_flag = tf.reshape(cur_gen_flag, [-1, 1]) all_cur_argmax = all_cur_argmax.write( i * self.sample_len + j, tf.argmax( new_output_all[(j * len(self.feature_outputs) + self.gen_flag_id)], axis=1)) gen_flag = gen_flag * cur_gen_flag return (i + 1, new_state, new_output, all_output.write(i, new_output), gen_flag, all_gen_flag, all_cur_argmax, cell_new_output)
def build(self, attribute_input_noise, addi_attribute_input_noise, feature_input_noise, feature_input_data, train, attribute=None): with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE): batch_size = tf.shape(feature_input_noise)[0] if attribute is None: all_attribute = [] all_discrete_attribute = [] if len(self.addi_attribute_outputs) > 0: all_attribute_input_noise = \ [attribute_input_noise, addi_attribute_input_noise] all_attribute_outputs = \ [self.real_attribute_outputs, self.addi_attribute_outputs] all_attribute_part_name = \ [self.STR_REAL, self.STR_ADDI] all_attribute_out_dim = \ [self.real_attribute_out_dim, self.addi_attribute_out_dim] else: all_attribute_input_noise = [attribute_input_noise] all_attribute_outputs = [self.real_attribute_outputs] all_attribute_part_name = [self.STR_REAL] all_attribute_out_dim = [self.real_attribute_out_dim] else: all_attribute = [attribute] all_discrete_attribute = [attribute] if len(self.addi_attribute_outputs) > 0: all_attribute_input_noise = \ [addi_attribute_input_noise] all_attribute_outputs = \ [self.addi_attribute_outputs] all_attribute_part_name = \ [self.STR_ADDI] all_attribute_out_dim = [self.addi_attribute_out_dim] else: all_attribute_input_noise = [] all_attribute_outputs = [] all_attribute_part_name = [] all_attribute_out_dim = [] for part_i in range(len(all_attribute_input_noise)): with tf.variable_scope("attribute_{}".format( all_attribute_part_name[part_i]), reuse=tf.AUTO_REUSE): if len(all_discrete_attribute) > 0: layers = [ tf.concat([all_attribute_input_noise[part_i]] + all_discrete_attribute, axis=1) ] else: layers = [all_attribute_input_noise[part_i]] for i in range(self.attribute_num_layers - 1): with tf.variable_scope("layer{}".format(i)): layers.append( linear(layers[-1], self.attribute_num_units)) layers.append(tf.nn.relu(layers[-1])) layers.append(batch_norm()(layers[-1], train=train)) with tf.variable_scope( "layer{}".format(self.attribute_num_layers - 1), reuse=tf.AUTO_REUSE): part_attribute = [] part_discrete_attribute = [] for i in range(len(all_attribute_outputs[part_i])): with tf.variable_scope("output{}".format(i), reuse=tf.AUTO_REUSE): output = all_attribute_outputs[part_i][i] sub_output_ori = linear(layers[-1], output.dim) if (output.type_ == OutputType.DISCRETE): sub_output = tf.nn.softmax(sub_output_ori) sub_output_discrete = tf.one_hot( tf.argmax(sub_output, axis=1), output.dim) elif (output.type_ == OutputType.CONTINUOUS): if (output.normalization == Normalization.ZERO_ONE): sub_output = tf.nn.sigmoid( sub_output_ori) elif (output.normalization == Normalization.MINUSONE_ONE): sub_output = tf.nn.tanh(sub_output_ori) else: raise Exception("unknown normalization" " type") sub_output_discrete = sub_output else: raise Exception("unknown output type") part_attribute.append(sub_output) part_discrete_attribute.append( sub_output_discrete) part_attribute = tf.concat(part_attribute, axis=1) part_discrete_attribute = tf.concat( part_discrete_attribute, axis=1) part_attribute = tf.reshape( part_attribute, [batch_size, all_attribute_out_dim[part_i]]) part_discrete_attribute = tf.reshape( part_discrete_attribute, [batch_size, all_attribute_out_dim[part_i]]) # batch_size * dim part_discrete_attribute = tf.stop_gradient( part_discrete_attribute) all_attribute.append(part_attribute) all_discrete_attribute.append(part_discrete_attribute) all_attribute = tf.concat(all_attribute, axis=1) all_discrete_attribute = tf.concat(all_discrete_attribute, axis=1) all_attribute = tf.reshape(all_attribute, [batch_size, self.attribute_out_dim]) all_discrete_attribute = tf.reshape( all_discrete_attribute, [batch_size, self.attribute_out_dim]) with tf.variable_scope("feature", reuse=tf.AUTO_REUSE): all_cell = [] for i in range(self.feature_num_layers): with tf.variable_scope("unit{}".format(i), reuse=tf.AUTO_REUSE): cell = tf.nn.rnn_cell.LSTMCell( num_units=self.feature_num_units, state_is_tuple=True) all_cell.append(cell) rnn_network = tf.nn.rnn_cell.MultiRNNCell(all_cell) feature_input_data_dim = \ len(feature_input_data.get_shape().as_list()) if feature_input_data_dim == 3: feature_input_data_reshape = tf.transpose( feature_input_data, [1, 0, 2]) feature_input_noise_reshape = tf.transpose( feature_input_noise, [1, 0, 2]) # time * batch_size * ? if self.initial_state == RNNInitialStateType.ZERO: initial_state = rnn_network.zero_state( batch_size, tf.float32) elif self.initial_state == RNNInitialStateType.RANDOM: initial_state = tf.random_normal( shape=(self.feature_num_layers, 2, batch_size, self.feature_num_units), mean=0.0, stddev=1.0) initial_state = tf.unstack(initial_state, axis=0) initial_state = tuple([ tf.nn.rnn_cell.LSTMStateTuple(initial_state[idx][0], initial_state[idx][1]) for idx in range(self.feature_num_layers) ]) elif self.initial_state == RNNInitialStateType.VARIABLE: initial_state = [] for i in range(self.feature_num_layers): sub_initial_state1 = tf.get_variable( "layer{}_initial_state1".format(i), (1, self.feature_num_units), initializer=tf.random_normal_initializer( stddev=self.initial_stddev)) sub_initial_state1 = tf.tile(sub_initial_state1, (batch_size, 1)) sub_initial_state2 = tf.get_variable( "layer{}_initial_state2".format(i), (1, self.feature_num_units), initializer=tf.random_normal_initializer( stddev=self.initial_stddev)) sub_initial_state2 = tf.tile(sub_initial_state2, (batch_size, 1)) sub_initial_state = tf.nn.rnn_cell.LSTMStateTuple( sub_initial_state1, sub_initial_state2) initial_state.append(sub_initial_state) initial_state = tuple(initial_state) else: return NotImplementedError time = feature_input_noise.get_shape().as_list()[1] if time is None: time = tf.shape(feature_input_noise)[1] def compute(i, state, last_output, all_output, gen_flag, all_gen_flag, all_cur_argmax, last_cell_output): input_all = [all_discrete_attribute] if self.noise: input_all.append(feature_input_noise_reshape[i]) if self.feed_back: if feature_input_data_dim == 3: input_all.append(feature_input_data_reshape[i]) else: input_all.append(last_output) input_all = tf.concat(input_all, axis=1) cell_new_output, new_state = rnn_network(input_all, state) new_output_all = [] id_ = 0 for j in range(self.sample_len): for k in range(len(self.feature_outputs)): with tf.variable_scope("output{}".format(id_), reuse=tf.AUTO_REUSE): output = self.feature_outputs[k] sub_output = linear(cell_new_output, output.dim) if (output.type_ == OutputType.DISCRETE): sub_output = tf.nn.softmax(sub_output) elif (output.type_ == OutputType.CONTINUOUS): if (output.normalization == Normalization.ZERO_ONE): sub_output = tf.nn.sigmoid(sub_output) elif (output.normalization == Normalization.MINUSONE_ONE): sub_output = tf.nn.tanh(sub_output) else: raise Exception("unknown normalization" " type") else: raise Exception("unknown output type") new_output_all.append(sub_output) id_ += 1 new_output = tf.concat(new_output_all, axis=1) for j in range(self.sample_len): all_gen_flag = all_gen_flag.write( i * self.sample_len + j, gen_flag) cur_gen_flag = tf.to_float( tf.equal( tf.argmax(new_output_all[( j * len(self.feature_outputs) + self.gen_flag_id)], axis=1), 0)) cur_gen_flag = tf.reshape(cur_gen_flag, [-1, 1]) all_cur_argmax = all_cur_argmax.write( i * self.sample_len + j, tf.argmax( new_output_all[(j * len(self.feature_outputs) + self.gen_flag_id)], axis=1)) gen_flag = gen_flag * cur_gen_flag return (i + 1, new_state, new_output, all_output.write(i, new_output), gen_flag, all_gen_flag, all_cur_argmax, cell_new_output) (i, state, _, feature, _, gen_flag, cur_argmax, cell_output) = \ tf.while_loop( lambda a, b, c, d, e, f, g, h: tf.logical_and(a < time, tf.equal(tf.reduce_max(e), 1)), compute, (0, initial_state, feature_input_data if feature_input_data_dim == 2 else feature_input_data_reshape[0], tf.TensorArray(tf.float32, time), tf.ones((batch_size, 1)), tf.TensorArray(tf.float32, time * self.sample_len), tf.TensorArray(tf.int64, time * self.sample_len), tf.zeros((batch_size, self.feature_num_units)))) def fill_rest(i, all_output, all_gen_flag, all_cur_argmax): all_output = all_output.write( i, tf.zeros((batch_size, self.feature_out_dim))) for j in range(self.sample_len): all_gen_flag = all_gen_flag.write( i * self.sample_len + j, tf.zeros((batch_size, 1))) all_cur_argmax = all_cur_argmax.write( i * self.sample_len + j, tf.zeros((batch_size, ), dtype=tf.int64)) return (i + 1, all_output, all_gen_flag, all_cur_argmax) _, feature, gen_flag, cur_argmax = tf.while_loop( lambda a, b, c, d: a < time, fill_rest, (i, feature, gen_flag, cur_argmax)) feature = feature.stack() # time * batch_size * (dim * sample_len) gen_flag = gen_flag.stack() # (time * sample_len) * batch_size * 1 cur_argmax = cur_argmax.stack() gen_flag = tf.transpose(gen_flag, [1, 0, 2]) # batch_size * (time * sample_len) * 1 cur_argmax = tf.transpose(cur_argmax, [1, 0]) # batch_size * (time * sample_len) length = tf.reduce_sum(gen_flag, [1, 2]) # batch_size feature = tf.transpose(feature, [1, 0, 2]) # batch_size * time * (dim * sample_len) gen_flag_t = tf.reshape(gen_flag, [batch_size, time, self.sample_len]) # batch_size * time * sample_len gen_flag_t = tf.reduce_sum(gen_flag_t, [2]) # batch_size * time gen_flag_t = tf.to_float(gen_flag_t > 0.5) gen_flag_t = tf.expand_dims(gen_flag_t, 2) # batch_size * time * 1 gen_flag_t = tf.tile(gen_flag_t, [1, 1, self.feature_out_dim]) # batch_size * time * (dim * sample_len) # zero out the parts after sequence ends feature = feature * gen_flag_t feature = tf.reshape(feature, [ batch_size, time * self.sample_len, self.feature_out_dim / self.sample_len ]) # batch_size * (time * sample_len) * dim return feature, all_attribute, gen_flag, length, cur_argmax
def transit(x, v): gate = op.linear(x, activation_fn=tf.sigmoid, scope='gate') return v * gate + x * (1 - gate)
def __init__( self, embeddings: embed.IndexedWordEmbedding, class_num: int, scale_l1: float = 0.0, scale_l2: float = 0.000001, encode_dim: int = 300, fact_intr_dim: int = 10, fact_proj_dim: int = -1, char_filer_width: int = 5, char_embed_dim: int = 8, char_conv_dim: int = 100, lstm_unit: int = 300, ) -> None: super(CAFE, self).__init__() self._class_num = class_num self.scale_l1 = scale_l1 self.scale_l2 = scale_l2 self.encode_dim = encode_dim self.fact_proj_dim = fact_proj_dim self.fact_intr_dim = fact_intr_dim self.char_filter_width = char_filer_width self.char_embed_dim = char_embed_dim self.char_conv_dim = char_conv_dim self.lstm_unit = lstm_unit self.keep_prob = tf.placeholder(tf.float32, shape=[]) op_kwargs = { 'scale_l1': self.scale_l1, 'scale_l2': self.scale_l2, 'keep_prob': self.keep_prob } with tf.variable_scope('embed') as s: # Word pretrained embeddings (300D) word_embed = tf.constant(embeddings.get_embeddings(), dtype=tf.float32, name='word_embed') word_embed1, word_embed2 = map(lambda x: tf.gather(word_embed, x), [self.x1, self.x2]) # Character convolutional embeddings (`char_conv_dim`D) char_embed = op.get_variable('char_embed', shape=(256, char_embed_dim)) char_filter = op.get_variable('char_filter', shape=(1, self.char_filter_width, self.char_embed_dim, self.char_conv_dim)) def embed_chars(x_char): embed = tf.gather(char_embed, x_char) # shape: [batch, seq_len, word_len, embed_dim] conv = tf.nn.conv2d(embed, char_filter, [1, 1, 1, 1], 'VALID') # shape: [batch, seq_len, word_len - filter_width + 1, conv_dim] return tf.reduce_max(conv, 2) # shape: [batch, seq_len, conv_dim] char_embed1, char_embed2 = map(embed_chars, [self.char1, self.char2]) # Tag one-hot embeddings (72D) def embed_tags(x_ids, x_tags, x_len): x_tags *= tf.sequence_mask(x_len, tf.shape(x_tags)[1], dtype=tf.int32) # shape: [batch, seq_len] tag_embed = tf.one_hot(x_tags, data.SNLI.TAGS, dtype=tf.float32, name='char_embed') return tag_embed[:, :tf.shape(x_ids)[1]] tag_embed1, tag_embed2 = map( embed_tags, *zip((self.x1, self.tag1, self.len1), (self.x2, self.tag2, self.len2))) # Merge embeddings x1 = tf.concat([word_embed1, char_embed1, tag_embed1], 2) x2 = tf.concat([word_embed2, char_embed2, tag_embed2], 2) with tf.variable_scope('encode') as s: def encode(x): x = op.highway(x, scope='hw-1', dim=self.encode_dim, **op_kwargs) x = op.highway(x, scope='hw-2', dim=self.encode_dim, **op_kwargs) return x x1, x2 = map(encode, [x1, x2]) # shape: [batch, seq_len, encode_dim] with tf.variable_scope('attent') as s: # Alignment def co_attent(t1, t2): t1 = op.linear(t1, **op_kwargs) t2 = op.linear(t2, **op_kwargs) return tf.matmul(t1, tf.matrix_transpose(t2)) # shape: [batch, seq_len1, seq_len2] with tf.variable_scope('inter-align') as s: att = co_attent(x1, x2) inter1 = tf.matmul(tf.nn.softmax(att), x2) inter2 = tf.matmul(tf.nn.softmax(tf.matrix_transpose(att)), x1) with tf.variable_scope('intra-align') as s: def self_attent(x): att = co_attent(x, x) return x * tf.reduce_sum(att, 2, keep_dims=True) intra1, intra2 = map(self_attent, [x1, x2]) def align_fact(x, x_align, scope): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): f1 = self.fact('fact-concat', tf.concat([x, x_align], 2)) f2 = self.fact('fact-sub', x - x_align) f3 = self.fact('fact-mul', x * x_align) return tf.stack([f1, f2, f3], 2) # shape: [batch, seq_len, 3] # TODO: variables may not be shared between different facts #x1 = tf.concat([x1, inter1, intra1], 2) #x2 = tf.concat([x2, inter2, intra2], 2) x1 = tf.concat([ x1, align_fact(x1, inter1, 'inter'), align_fact(x1, intra1, 'intra') ], 2) x2 = tf.concat([ x2, align_fact(x2, inter2, 'inter'), align_fact(x2, intra2, 'intra') ], 2) with tf.variable_scope('sequence', reuse=tf.AUTO_REUSE) as s: def lstm_encode(x): # shape: [batch, seq_len, encode_dim + 6] outputs, states = tf.nn.dynamic_rnn( cell=tf.nn.rnn_cell.LSTMCell(self.lstm_unit), inputs=x, dtype=tf.float32) outputs = tf.nn.dropout(outputs, self.keep_prob) return outputs x1, x2 = map(lstm_encode, [x1, x2]) with tf.variable_scope('pooling') as s: def pool(x): return tf.concat([ tf.reduce_max(x, axis=1), tf.reduce_sum(x, axis=1), ], 1) # shape: [batch, dim] x1, x2 = map(pool, [x1, x2]) with tf.variable_scope('decode') as s: x = tf.concat([x1, x2, x1 - x2, x1 * x2], 1) x = op.highway(x, scope='hw-1', dim=self.encode_dim, **op_kwargs) x = op.highway(x, scope='hw-2', dim=self.encode_dim, **op_kwargs) y_hat = op.linear(x, dim=self._class_num, activation_fn=None) self.evaluate_and_loss(y_hat)
def __init__( self, embeddings: embed.IndexedWordEmbedding, class_num: int, scale_l1: float = 0.0, scale_l2: float = 0.0, lstm_unit: int = 300, seq_len: int = 0, char_filer_width: int = 5, char_embed_dim: int = 8, char_conv_dim: int = 100, class_weights: t.List[float] = [1.1, 1, 1], ) -> None: super(ESIM, self).__init__() self._class_num = class_num self.class_weights = class_weights self.scale_l1 = scale_l1 self.scale_l2 = scale_l2 self.lstm_unit = lstm_unit self.seq_len = seq_len self.char_filter_width = char_filer_width self.char_embed_dim = char_embed_dim self.char_conv_dim = char_conv_dim op_kwargs = { 'scale_l1': self.scale_l1, 'scale_l2': self.scale_l2, 'keep_prob': self.keep_prob, 'drop_after': False } with tf.variable_scope('embed') as s: def set_seq_len(x): x_len = tf.shape(x)[1] return tf.cond( tf.less(self.seq_len, x_len), lambda: x[:, :self.seq_len], lambda: tf.pad(x, [[0, 0], [0, self.seq_len - x_len]])) if self.seq_len > 0: x1, x2 = map(set_seq_len, [self.x1, self.x2]) else: x1, x2 = self.x1, self.x2 #embed_init_var = embeddings.get_embeddings() #embed = op.get_variable('embeddings', # shape=embed_init_var.shape, # initializer=tf.constant_initializer(embed_init_var)) #embed = tf.constant(embeddings.get_embeddings(), # dtype=tf.float32, # name='embeddings') #x1, x2 = map(lambda x: tf.gather(embed, x), [x1, x2]) # Word pretrained embeddings (300D) word_embed = tf.constant(embeddings.get_embeddings(), dtype=tf.float32, name='word_embed') word_embed1, word_embed2 = map(lambda x: tf.gather(word_embed, x), [self.x1, self.x2]) embed_dim = word_embed.get_shape()[-1] # Character convolutional embeddings (`char_conv_dim`D) char_embed = op.get_variable('char_embed', shape=(256, char_embed_dim)) char_filter = op.get_variable('char_filter', shape=(1, self.char_filter_width, self.char_embed_dim, self.char_conv_dim)) def embed_chars(x_char): embed = tf.gather(char_embed, x_char) # shape: [batch, seq_len, word_len, embed_dim] conv = tf.nn.conv2d(embed, char_filter, [1, 1, 1, 1], 'VALID') # shape: [batch, seq_len, word_len - filter_width + 1, conv_dim] return tf.reduce_max(conv, 2) # shape: [batch, seq_len, conv_dim] char_embed1, char_embed2 = map(embed_chars, [self.char1, self.char2]) # Tag one-hot embeddings (72D) def embed_tags(x_ids, x_tags, x_len): x_tags *= tf.sequence_mask(x_len, tf.shape(x_tags)[1], dtype=tf.int32) # shape: [batch, seq_len] tag_embed = tf.one_hot(x_tags, data.SNLI.TAGS, dtype=tf.float32, name='char_embed') return tag_embed[:, :tf.shape(x_ids)[1]] tag_embed1, tag_embed2 = map( embed_tags, *zip((self.x1, self.tag1, self.len1), (self.x2, self.tag2, self.len2))) # Merge embeddings #x1 = tf.concat([word_embed1, char_embed1, tag_embed1], 2) #x2 = tf.concat([word_embed2, char_embed2, tag_embed2], 2) x1 = tf.concat([word_embed1, char_embed1], 2) x2 = tf.concat([word_embed2, char_embed2], 2) x1 = self.unfold_tree(x1, self.temp1, self.tag1, self.len1, 'x1') x2 = self.unfold_tree(x2, self.temp2, self.tag2, self.len2, 'x2') with tf.variable_scope('encode', reuse=tf.AUTO_REUSE) as s: x1, x2 = map(lambda x: tf.nn.dropout(x, self.keep_prob), [x1, x2]) #import pdb; pdb.set_trace() x1, x2 = map(self.bilstm, [x1, x2]) # shape: [batch, seq_len, embed_dim * 2] with tf.variable_scope('attent') as s: sim = tf.matmul(x1, tf.matrix_transpose(x2)) alpha = tf.matmul(tf.nn.softmax(tf.matrix_transpose(sim)), x1) beta = tf.matmul(tf.nn.softmax(sim), x2) x1 = tf.concat([x1, beta, x1 * beta, x1 - beta], 2) x2 = tf.concat([x2, alpha, x2 * alpha, x2 - alpha], 2) # shape: [batch, seq_len, embed_dim * 8] with tf.variable_scope('decode', reuse=tf.AUTO_REUSE) as s: x1, x2 = map(lambda x: op.linear(x, dim=embed_dim, **op_kwargs), [x1, x2]) # NOTE: dropout here in the author's code # shape: [batch, seq_len, embed_dim] x1, x2 = map(self.bilstm, [x1, x2]) # shape: [batch, seq_len, embed_dim * 2] with tf.variable_scope('aggregate') as s: def pool(x): return tf.concat( [tf.reduce_sum(x, axis=1), tf.reduce_max(x, axis=1)], 1) y_hat = op.linear(tf.concat([pool(x1), pool(x2)], 1), dim=embed_dim, activation_fn=tf.nn.tanh, scope='linear-1', **op_kwargs) # shape: [batch, embed_dim * 8] y_hat = op.linear(y_hat, dim=self._class_num, activation_fn=None, scope='linear-2', **op_kwargs) # shape: [batch, class_num] self.evaluate_and_loss(y_hat, self.class_weights)
def linear(self, inputs: tf.Tensor, dim: int, bias=True): return op.linear(inputs, dim, activation_fn=None, bias=bias)