def weighted_average(self, inputs, moving_params=None): """""" input_shape = tf.shape(inputs) batch_size = input_shape[0] bucket_size = input_shape[1] input_size = len(self) if moving_params is not None: trainable_embeddings = moving_params.average( self.trainable_embeddings) else: trainable_embeddings = self.trainable_embeddings embed_input = tf.matmul(tf.reshape(inputs, [-1, input_size]), trainable_embeddings) embed_input = tf.reshape( embed_input, tf.stack([batch_size, bucket_size, self.embed_size])) embed_input.set_shape([ tf.Dimension(None), tf.Dimension(None), tf.Dimension(self.embed_size) ]) if moving_params is None: tf.add_to_collection('Weights', embed_input) return embed_input
def linear_classifier(self, inputs, n_classes, add_bias=True, keep_prob=None): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = n_classes output_shape = tf.stack([batch_size] + [bucket_size] * (n_dims - 2) + [output_size]) if self.moving_params is None: if keep_prob is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.stack([batch_size] + [1] * (n_dims - 2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) inputs = tf.reshape(inputs, [-1, input_size]) output = linalg.linear(inputs, output_size, add_bias=add_bias, initializer=tf.zeros_initializer, moving_params=self.moving_params) output = tf.reshape(output, output_shape) output.set_shape([tf.Dimension(None)] * (n_dims - 1) + [tf.Dimension(output_size)]) return output
def conditional_bilinear_classifier(self, inputs1, inputs2, n_classes, probs, add_bias1=True, add_bias2=True): """""" input_shape = tf.shape(inputs1) batch_size = input_shape[0] bucket_size = input_shape[1] input_size = inputs1.get_shape().as_list()[-1] input_shape_to_set = [ tf.Dimension(None), tf.Dimension(None), input_size + 1 ] output_shape = tf.stack( [batch_size, bucket_size, n_classes, bucket_size]) if len(probs.get_shape().as_list()) == 2: probs = tf.to_float( tf.one_hot(tf.to_int64(probs), bucket_size, 1, 0)) else: probs = tf.stop_gradient(probs) if self.moving_params is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.stack([batch_size, 1, input_size]) inputs1 = tf.nn.dropout(inputs1, keep_prob, noise_shape=noise_shape) inputs2 = tf.nn.dropout(inputs2, keep_prob, noise_shape=noise_shape) inputs1 = tf.concat( 2, [inputs1, tf.ones(tf.stack([batch_size, bucket_size, 1]))]) inputs1.set_shape(input_shape_to_set) inputs2 = tf.concat( 2, [inputs2, tf.ones(tf.stack([batch_size, bucket_size, 1]))]) inputs2.set_shape(input_shape_to_set) bilin = linalg.bilinear(inputs1, inputs2, n_classes, add_bias1=add_bias1, add_bias2=add_bias2, initializer=tf.zeros_initializer, moving_params=self.moving_params) weighted_bilin = tf.batch_matmul(bilin, tf.expand_dims(probs, 3)) return weighted_bilin, bilin
def test_convert_to_tensor_dimension_list(self): convert_to_tensor = numpy_backend.convert_to_tensor shape = tf.TensorShape((1, 2)) tensor_shape = convert_to_tensor(shape) for dim in tensor_shape: self.assertNotIsInstance(dim, tf1.Dimension) shape = [tf1.Dimension(1), tf1.Dimension(2)] tensor_shape = convert_to_tensor(shape) for dim in tensor_shape: self.assertNotIsInstance(dim, tf1.Dimension)
def MLP(self, inputs, output_size, func=None, keep_prob=None, n_splits=1): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_shape = tf.stack([batch_size] + [bucket_size] * (n_dims - 2) + [output_size]) shape_to_set = [tf.Dimension(None)] * (n_dims - 1) + [ tf.Dimension(output_size) ] if func is None: func = self.mlp_func if self.moving_params is None: if keep_prob is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if keep_prob < 1: noise_shape = tf.stack([batch_size] + [1] * (n_dims - 2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) linear = linalg.linear( inputs, output_size, n_splits=n_splits * (1 + (func.__name__ in ('gated_tanh', 'gated_identity'))), add_bias=True, moving_params=self.moving_params) if func.__name__ in ('gated_tanh', 'gated_identity'): linear = [ tf.concat(n_dims - 1, [lin1, lin2]) for lin1, lin2 in zip(linear[:len(linear) // 2], linear[len(linear) // 2:]) ] if n_splits == 1: linear = [linear] for i, split in enumerate(linear): split = func(split) split.set_shape(shape_to_set) linear[i] = split if n_splits == 1: return linear[0] else: return linear
def double_MLP(self, inputs, n_splits=1): """""" batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = self.attn_mlp_size output_shape = tf.stack( [batch_size, bucket_size, bucket_size, output_size]) shape_to_set = [ tf.Dimension(None), tf.Dimension(None), tf.Dimension(None), tf.Dimension(output_size) ] if self.moving_params is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.stack([batch_size, 1, input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin1, lin2 = linalg.linear(inputs, output_size * n_splits, n_splits=2, add_bias=True, moving_params=self.moving_params) lin1 = tf.reshape(tf.transpose(lin1, [0, 2, 1]), tf.stack([-1, bucket_size, 1])) lin2 = tf.reshape(tf.transpose(lin2, [0, 2, 1]), tf.stack([-1, 1, bucket_size])) lin = lin1 + lin2 lin = tf.reshape( lin, tf.stack( [batch_size, n_splits * output_size, bucket_size, bucket_size])) lin = tf.transpose(lin, [0, 2, 3, 1]) top_mlps = tf.split(3, n_splits, self.mlp_func(lin)) for top_mlp in top_mlps: top_mlp.set_shape(shape_to_set) if n_splits == 1: return top_mlps[0] else: return top_mlps
def broadcast_sub(inputs1, inputs2): """""" inputs1_shape = tf.shape(inputs1) inputs_size = inputs1.get_shape().as_list()[-1] inputs2_shape = tf.shape(inputs2) inputs1 = tf.transpose(inputs1, [0, 2, 1]) inputs2 = tf.transpose(inputs2, [0, 2, 1]) inputs1 = tf.reshape(inputs1, tf.stack([-1, inputs1_shape[1], 1])) inputs2 = tf.reshape(inputs2, tf.stack([-1, 1, inputs2_shape[1]])) inputs = inputs1 - inputs2 inputs = tf.reshape(inputs, [ inputs1_shape[0], inputs1_shape[2], inputs1_shape[1], inputs2_shape[1] ]) inputs = tf.transpose(inputs, [0, 2, 3, 1]) inputs.set_shape([tf.Dimension(None)] * 3 + [tf.Dimension(inputs_size)]) return inputs
def linear(self, inputs, output_size, n_splits=1, add_bias=False): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_shape = tf.stack([batch_size] + [bucket_size] * (n_dims - 2) + [output_size]) shape_to_set = [tf.Dimension(None)] * (n_dims - 1) + [ tf.Dimension(output_size) ] if self.moving_params is None: keep_prob = self.info_keep_prob else: keep_prob = 1 if keep_prob < 1: noise_shape = tf.stack([batch_size] + [1] * (n_dims - 2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin = linalg.linear(inputs, output_size, n_splits=n_splits, add_bias=add_bias, moving_params=self.moving_params) if n_splits == 1: lin = [lin] for i, split in enumerate(lin): split.set_shape(shape_to_set) if n_splits == 1: return lin[0] else: return lin
def soft_attn(self, top_recur): """""" reuse = (self.moving_params is not None) or None input_size = top_recur.get_shape().as_list()[-1] with tf.variable_scope('MLP', reuse=reuse): head_mlp, dep_mlp = self.MLP(top_recur, self.info_mlp_size, func=self.info_func, keep_prob=self.info_keep_prob, n_splits=2) with tf.variable_scope('Arcs', reuse=reuse): arc_logits = self.bilinear_classifier( dep_mlp, head_mlp, keep_prob=self.info_keep_prob) arc_prob = self.softmax(arc_logits) head_lin = tf.batch_matmul(arc_prob, top_recur) top_recur = tf.concat(2, [top_recur, head_lin]) top_recur.set_shape([ tf.Dimension(None), tf.Dimension(None), tf.Dimension(4 * self.recur_size) ]) return top_recur
def RNN(self, inputs): """""" input_size = inputs.get_shape().as_list()[-1] cell = self.recur_cell(self._config, input_size=input_size, moving_params=self.moving_params) lengths = tf.reshape(tf.to_int64(self.sequence_lengths), [-1]) if self.moving_params is None: ff_keep_prob = self.ff_keep_prob recur_keep_prob = self.recur_keep_prob else: ff_keep_prob = 1 recur_keep_prob = 1 if self.recur_bidir: top_recur, fw_recur, bw_recur = rnn.dynamic_bidirectional_rnn( cell, cell, inputs, lengths, ff_keep_prob=ff_keep_prob, recur_keep_prob=recur_keep_prob, dtype=tf.float32) fw_cell, fw_out = tf.split(1, 2, fw_recur) bw_cell, bw_out = tf.split(1, 2, bw_recur) end_recur = tf.concat(1, [fw_out, bw_out]) top_recur.set_shape([ tf.Dimension(None), tf.Dimension(None), tf.Dimension(2 * self.recur_size) ]) else: top_recur, end_recur = rnn.dynamic_rnn( cell, inputs, lengths, ff_keep_prob=ff_keep_prob, recur_keep_prob=recur_keep_prob, dtype=tf.float32) top_recur.set_shape([ tf.Dimension(None), tf.Dimension(None), tf.Dimension(self.recur_size) ]) return top_recur, end_recur
def test_convert_to_tensor_dimension(self): convert_to_tensor = numpy_backend.convert_to_tensor shape = tf1.Dimension(1) tensor_shape = convert_to_tensor(shape) self.assertNotIsInstance(tensor_shape, tf1.Dimension)
def test_convert_dimension_to_tensor(self): v = ps.constant(tf1.Dimension(1)) self.assertEqual(1, v)
def linear(inputs, output_size, add_bias=True, n_splits=1, initializer=None, scope=None, moving_params=None): """""" if not isinstance(inputs, (list, tuple)): inputs = [inputs] output_size *= n_splits with tf.variable_scope(scope or 'Linear'): # Reformat the input total_input_size = 0 shapes = [a.get_shape().as_list() for a in inputs] for shape in shapes: total_input_size += shape[-1] input_shape = tf.shape(inputs[0]) output_shape = [] for i in xrange(len(shapes[0])): output_shape.append(input_shape[i]) output_shape[-1] = output_size output_shape = tf.stack(output_shape) for i, (input_, shape) in enumerate(zip(inputs, shapes)): inputs[i] = tf.reshape(input_, [-1, shape[-1]]) concatenation = tf.concat(1, inputs) # Get the matrix if initializer is None and moving_params is None: mat = orthonormal_initializer(total_input_size, output_size // n_splits) mat = np.concatenate([mat] * n_splits, axis=1) initializer = tf.constant_initializer(mat) matrix = tf.get_variable('Weights', [total_input_size, output_size], initializer=initializer) if moving_params is not None: matrix = moving_params.average(matrix) else: tf.add_to_collection('Weights', matrix) # Get the bias if add_bias: bias = tf.get_variable('Biases', [output_size], initializer=tf.zeros_initializer) if moving_params is not None: bias = moving_params.average(bias) else: bias = 0 # Do the multiplication new = tf.matmul(concatenation, matrix) + bias new = tf.reshape(new, output_shape) new.set_shape([tf.Dimension(None) for _ in xrange(len(shapes[0]) - 1)] + [tf.Dimension(output_size)]) if n_splits > 1: return tf.split(len(new.get_shape().as_list()) - 1, n_splits, new) else: return new
def diagonal_bilinear(inputs1, inputs2, output_size, add_bias2=True, add_bias1=True, add_bias=False, initializer=None, scope=None, moving_params=None): """""" with tf.variable_scope(scope or 'Bilinear'): # Reformat the inputs ndims = len(inputs1.get_shape().as_list()) inputs1_shape = tf.shape(inputs1) inputs2_shape = tf.shape(inputs2) inputs1_bucket_size = inputs1_shape[ndims - 2] inputs2_bucket_size = inputs2_shape[ndims - 2] inputs1_size = inputs1.get_shape().as_list()[-1] inputs2_size = inputs2.get_shape().as_list()[-1] assert inputs1_size == inputs2_size output_shape = [] batch_size = 1 for i in xrange(ndims - 2): batch_size *= inputs1_shape[i] output_shape.append(inputs1_shape[i]) output_shape.append(inputs1_bucket_size) output_shape.append(output_size) output_shape.append(inputs2_bucket_size) output_shape = tf.stack(output_shape) inputs1 = tf.reshape( inputs1, tf.stack([batch_size, inputs1_bucket_size, inputs1_size])) inputs2 = tf.reshape( inputs2, tf.stack([batch_size, inputs2_bucket_size, inputs2_size])) inputs1.set_shape([tf.Dimension(None)] * 2 + [tf.Dimension(inputs1_size)]) inputs2.set_shape([tf.Dimension(None)] * 2 + [tf.Dimension(inputs2_size)]) inputs = broadcast_mult(inputs1, inputs2) with tf.variable_scope('Bilinear'): bilin = linear(inputs, output_size, add_bias=add_bias, initializer=initializer, scope=scope, moving_params=moving_params) with tf.variable_scope('Linear1'): lin1 = linear(inputs1, output_size, add_bias=False, initializer=initializer, scope=scope, moving_params=moving_params) lin1 = tf.expand_dims(lin1, 2) with tf.variable_scope('Linear2'): lin2 = linear(inputs2, output_size, add_bias=False, initializer=initializer, scope=scope, moving_params=moving_params) lin2 = tf.expand_dims(lin2, 1) bilin = tf.transpose(bilin + lin1 + lin2, [0, 1, 3, 2]) return bilin
def shape(self): feature_shape = self.tensors[0].shape[1:] batch_size = sum([tensor.shape[0] for tensor in self.tensors], tf.Dimension(0)) return tf.TensorShape([batch_size]).concatenate(feature_shape)