def __call__(self, inputs, state, scope=None): """""" with tf.variable_scope(scope or type(self).__name__): cell_tm1, hidden_tm1 = tf.split(axis=1, num_or_size_splits=2, value=state) with tf.variable_scope('Gates'): linear = linalg.linear([inputs, hidden_tm1], self.output_size, add_bias=True, n_splits=2, moving_params=self.moving_params) update_act, reset_act = linear update_gate = linalg.sigmoid(update_act - self.forget_bias) reset_gate = linalg.sigmoid(reset_act) reset_state = reset_gate * hidden_tm1 with tf.variable_scope('Candidate'): hidden_act = linalg.linear([inputs, reset_state], self.output_size, add_bias=True, moving_params=self.moving_params) hidden_tilde = self.recur_func(hidden_act) cell_t = update_gate * cell_tm1 + (1 - update_gate) * hidden_tilde return cell_t, tf.concat(axis=1, values=[cell_t, cell_t])
def conditional_linear_classifier(self, inputs, n_classes, probs, add_bias=True): """""" input_shape = tf.shape(inputs) batch_size = input_shape[0] bucket_size = input_shape[1] input_size = inputs.get_shape().as_list()[-1] if len(probs.get_shape().as_list()) == 2: probs = tf.to_float( tf.one_hot(tf.to_int64(probs), bucket_size, 1, 0)) else: probs = tf.stop_gradient(probs) if self.moving_params is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.stack([batch_size, 1, 1, input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin = linalg.linear(inputs, n_classes, add_bias=add_bias, initializer=tf.zeros_initializer(), moving_params=self.moving_params) weighted_lin = tf.matmul(lin, tf.expand_dims(probs, 3), adjoint_a=True) return weighted_lin, lin
def linear_classifier(self, inputs, n_classes, add_bias=True, keep_prob=None): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = n_classes output_shape = tf.stack([batch_size] + [bucket_size] * (n_dims - 2) + [output_size]) if self.moving_params is None: if keep_prob is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.stack([batch_size] + [1] * (n_dims - 2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) inputs = tf.reshape(inputs, [-1, input_size]) output = linalg.linear(inputs, output_size, add_bias=add_bias, initializer=tf.zeros_initializer(), moving_params=self.moving_params) output = tf.reshape(output, output_shape) output.set_shape([tf.Dimension(None)] * (n_dims - 1) + [tf.Dimension(output_size)]) return output
def __call__(self, inputs, state, scope=None): """""" if self.recur_diag_bilin: inputs1, inputs2 = tf.split(1, 2, inputs) inputs = tf.concat(1, [inputs1 * inputs2, inputs1, inputs2]) with tf.variable_scope(scope or type(self).__name__): cell_tm1, hidden_tm1 = tf.split(1, 2, state) linear = linalg.linear([inputs, hidden_tm1], self.output_size, add_bias=True, n_splits=3, moving_params=self.moving_params) cell_act, update_act, output_act = linear cell_tilde_t = cell_act update_gate = linalg.sigmoid(update_act - self.forget_bias) output_gate = linalg.sigmoid(output_act) cell_t = update_gate * cell_tilde_t + (1 - update_gate) * cell_tm1 hidden_tilde_t = self.recur_func(cell_t) hidden_t = hidden_tilde_t * output_gate if self.hidden_include_prob < 1 and self.moving_params is None: hidden_mask = tf.nn.dropout( tf.ones_like(hidden_t), self.hidden_include_prob) * self.hidden_include_prob hidden_t = hidden_mask * hidden_t + (1 - hidden_mask) * hidden_tm1 if self.cell_include_prob < 1 and self.moving_params is None: cell_mask = tf.nn.dropout( tf.ones_like(cell_t), self.cell_include_prob) * self.cell_include_prob cell_t = cell_mask * cell_t + (1 - cell_mask) * cell_tm1 return hidden_t, tf.concat(1, [cell_t, hidden_t])
def double_MLP(self, inputs, n_splits=1): """""" batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = self.mlp_size output_shape = tf.pack( [batch_size, bucket_size, bucket_size, output_size]) shape_to_set = [ tf.Dimension(None), tf.Dimension(None), tf.Dimension(None), tf.Dimension(output_size) ] if self.moving_params is None: if self.drop_gradually: s = self.global_sigmoid keep_prob = s + (1 - s) * self.mlp_keep_prob else: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size, 1, input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin1, lin2 = linalg.linear(inputs, output_size * n_splits, n_splits=2, add_bias=True, moving_params=self.moving_params) lin1 = tf.reshape(tf.transpose(lin1, [0, 2, 1]), tf.pack([-1, bucket_size, 1])) lin2 = tf.reshape(tf.transpose(lin2, [0, 2, 1]), tf.pack([-1, 1, bucket_size])) lin = lin1 + lin2 lin = tf.reshape( lin, tf.pack( [batch_size, n_splits * output_size, bucket_size, bucket_size])) lin = tf.transpose(lin, [0, 2, 3, 1]) top_mlps = tf.split(3, n_splits, self.mlp_func(lin)) for top_mlp in top_mlps: top_mlp.set_shape(shape_to_set) if self.moving_params is None: with tf.variable_scope('Linear', reuse=True): matrix = tf.get_variable('Weights') I = tf.diag(tf.ones([self.mlp_size])) for W in tf.split(1, 2 * n_splits, matrix): WTWmI = tf.matmul(W, W, transpose_a=True) - I tf.add_to_collection('ortho_losses', tf.nn.l2_loss(WTWmI)) for split in top_mlps: tf.add_to_collection('covar_losses', self.covar_loss(split)) if n_splits == 1: return top_mlps[0] else: return top_mlps
def __call__(self, inputs, state, scope=None): """""" with tf.variable_scope(scope or type(self).__name__): cell_tm1, hidden_tm1 = tf.split(1, 2, state) linear = linalg.linear([inputs, hidden_tm1], self.output_size, add_bias=False, n_splits=4, moving_params=self.moving_params) with tf.variable_scope('Linear'): biases = tf.get_variable('Biases', [3 * self.output_size], initializer=tf.zeros_initializer) biases = tf.split(0, 3, biases) cell_act, input_act, forget_act, output_act = linear input_bias, forget_bias, output_bias = biases cell_tilde_t = linalg.tanh(cell_act) input_gate = linalg.sigmoid(input_act + input_bias) forget_gate = linalg.sigmoid(forget_act + forget_bias - self.forget_bias) output_gate = linalg.sigmoid(output_act + output_bias) cell_t = input_gate * cell_tilde_t + (1 - forget_gate) * cell_tm1 hidden_tilde_t = self.recur_func(cell_t) hidden_t = hidden_tilde_t * output_gate return hidden_t, tf.concat(1, [cell_t, hidden_t])
def double_MLP(self, inputs, n_splits=1): """""" batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = self.attn_mlp_size output_shape = tf.stack([batch_size, bucket_size, bucket_size, output_size]) shape_to_set = [tf.Dimension(None), tf.Dimension(None), tf.Dimension(None), tf.Dimension(output_size)] if self.moving_params is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.stack([batch_size, 1, input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin1, lin2 = linalg.linear(inputs, output_size*n_splits, n_splits=2, add_bias=True, moving_params=self.moving_params) lin1 = tf.reshape(tf.transpose(lin1, [0, 2, 1]), tf.stack([-1, bucket_size, 1])) lin2 = tf.reshape(tf.transpose(lin2, [0, 2, 1]), tf.stack([-1, 1, bucket_size])) lin = lin1 + lin2 lin = tf.reshape(lin, tf.stack([batch_size, n_splits*output_size, bucket_size, bucket_size])) lin = tf.transpose(lin, [0,2,3,1]) top_mlps = tf.split(axis=3, num_or_size_splits=n_splits, value=self.mlp_func(lin)) for top_mlp in top_mlps: top_mlp.set_shape(shape_to_set) if n_splits == 1: return top_mlps[0] else: return top_mlps
def linear(self, inputs, output_size, n_splits=1, add_bias=False): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_shape = tf.stack([batch_size] + [bucket_size]*(n_dims-2) + [output_size]) shape_to_set = [tf.Dimension(None)]*(n_dims-1) + [tf.Dimension(output_size)] if self.moving_params is None: keep_prob = self.info_keep_prob else: keep_prob = 1 if keep_prob < 1: noise_shape = tf.stack([batch_size] + [1]*(n_dims-2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin = linalg.linear(inputs, output_size, n_splits=n_splits, add_bias=add_bias, moving_params=self.moving_params) if n_splits == 1: lin = [lin] for i, split in enumerate(lin): split.set_shape(shape_to_set) if n_splits == 1: return lin[0] else: return lin
def __call__(self, inputs, state, scope=None): """""" with tf.variable_scope(scope or type(self).__name__): hidden_act = linalg.linear([inputs, state], self.output_size, add_bias=False, moving_params=self.moving_params) hidden = self.recur_func(hidden_act) return hidden, hidden
def __call__(self, inputs, state, scope=None): """""" if self.recur_diag_bilin: inputs1, inputs2 = tf.split(1, 2, inputs) inputs = tf.concat(1, [inputs1 * inputs2, inputs1, inputs2]) with tf.variable_scope(scope or type(self).__name__): hidden_act = linalg.linear([inputs, state], self.output_size, add_bias=False, moving_params=self.moving_params) hidden = self.recur_func(hidden_act) return hidden, hidden
def __call__(self, inputs, state, scope=None): """""" with tf.variable_scope(scope or type(self).__name__): with tf.variable_scope('Gates'): linear = linalg.linear([inputs, state], self.output_size, add_bias=True, n_splits=2, moving_params=self.moving_params) update_act, reset_act = linear update_gate = linalg.sigmoid(update_act - self.forget_bias) reset_gate = linalg.sigmoid(reset_act) reset_state = reset_gate * state with tf.variable_scope('Candidate'): hidden_act = linalg.linear([inputs, reset_state], self.output_size, add_bias=False, moving_params=self.moving_params) hidden_tilde = self.recur_func(hidden_act) hidden = update_gate * state + (1 - update_gate) * hidden_tilde return hidden, hidden
def MLP(self, inputs, n_splits=1): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_size = self.mlp_size output_shape = tf.pack([batch_size] + [bucket_size] * (n_dims - 2) + [output_size]) shape_to_set = [tf.Dimension(None)] * (n_dims - 1) + [ tf.Dimension(output_size) ] if self.moving_params is None: if self.drop_gradually: s = self.global_sigmoid keep_prob = s + (1 - s) * self.mlp_keep_prob else: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size] + [1] * (n_dims - 2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) linear = linalg.linear(inputs, output_size, n_splits=n_splits, add_bias=True, moving_params=self.moving_params) if n_splits == 1: linear = [linear] for i, split in enumerate(linear): split = self.mlp_func(split) split.set_shape(shape_to_set) linear[i] = split if self.moving_params is None: with tf.variable_scope('Linear', reuse=True): matrix = tf.get_variable('Weights') I = tf.diag(tf.ones([self.mlp_size])) for W in tf.split(1, n_splits, matrix): WTWmI = tf.matmul(W, W, transpose_a=True) - I tf.add_to_collection('ortho_losses', tf.nn.l2_loss(WTWmI)) for split in linear: tf.add_to_collection('covar_losses', self.covar_loss(split)) if n_splits == 1: return linear[0] else: return linear
def __call__(self, inputs, state, scope=None): """""" with tf.compat.v1.variable_scope(scope or type(self).__name__): cell_tm1, hidden_tm1 = tf.split(axis=1, num_or_size_splits=2, value=state) if self.recur_diag_bilin: inputs1, inputs2 = tf.split(axis=1, num_or_size_splits=2, value=inputs) input_list = [inputs1 * inputs2, inputs1 + inputs2, hidden_tm1] else: input_list = [inputs, hidden_tm1] linear = linalg.linear(input_list, self.output_size, add_bias=False, n_splits=4, moving_params=self.moving_params) with tf.compat.v1.variable_scope('Linear'): biases = tf.compat.v1.get_variable( 'Biases', [4 * self.output_size], initializer=tf.zeros_initializer()) biases = tf.split(axis=0, num_or_size_splits=4, value=biases) cell_act, input_act, forget_act, output_act = linear cell_bias, input_bias, forget_bias, output_bias = biases cell_tilde_t = linalg.tanh(cell_act + cell_bias) input_gate = linalg.sigmoid(input_act + input_bias) forget_gate = linalg.sigmoid(forget_act + forget_bias - self.forget_bias) output_gate = linalg.sigmoid(output_act + output_bias) cell_t = input_gate * cell_tilde_t + (1 - forget_gate) * cell_tm1 hidden_tilde_t = self.recur_func(cell_t) hidden_t = hidden_tilde_t * output_gate if self.hidden_include_prob < 1 and self.moving_params is None: hidden_mask = tf.nn.dropout( tf.ones_like(hidden_t), self.hidden_include_prob) * self.hidden_include_prob hidden_t = hidden_mask * hidden_t + (1 - hidden_mask) * hidden_tm1 if self.cell_include_prob < 1 and self.moving_params is None: cell_mask = tf.nn.dropout( tf.ones_like(cell_t), self.cell_include_prob) * self.cell_include_prob cell_t = cell_mask * cell_t + (1 - cell_mask) * cell_tm1 return hidden_t, tf.concat(axis=1, values=[cell_t, hidden_t])
def MLP(self, inputs, output_size, func=None, keep_prob=None, n_splits=1): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_shape = tf.stack([batch_size] + [bucket_size] * (n_dims - 2) + [output_size]) shape_to_set = [tf.Dimension(None)] * (n_dims - 1) + [ tf.Dimension(output_size) ] if func is None: func = self.mlp_func if self.moving_params is None: if keep_prob is None: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if keep_prob < 1: noise_shape = tf.stack([batch_size] + [1] * (n_dims - 2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) linear = linalg.linear( inputs, output_size, n_splits=n_splits * (1 + (func.__name__ in ('gated_tanh', 'gated_identity'))), add_bias=True, moving_params=self.moving_params) if func.__name__ in ('gated_tanh', 'gated_identity'): linear = [ tf.concat(axis=n_dims - 1, values=[lin1, lin2]) for lin1, lin2 in zip(linear[:len(linear) // 2], linear[len(linear) // 2:]) ] if n_splits == 1: linear = [linear] for i, split in enumerate(linear): split = func(split) split.set_shape(shape_to_set) linear[i] = split if n_splits == 1: return linear[0] else: return linear
def linear(self, inputs, output_size, add_bias=False): """""" n_dims = len(inputs.get_shape().as_list()) batch_size = tf.shape(inputs)[0] bucket_size = tf.shape(inputs)[1] input_size = inputs.get_shape().as_list()[-1] output_shape = tf.pack([batch_size] + [bucket_size] * (n_dims - 2) + [output_size]) shape_to_set = [tf.Dimension(None)] * (n_dims - 1) + [ tf.Dimension(output_size) ] if self.moving_params is None: if self.drop_gradually: s = self.global_sigmoid keep_prob = s + (1 - s) * self.mlp_keep_prob else: keep_prob = self.mlp_keep_prob else: keep_prob = 1 if isinstance(keep_prob, tf.Tensor) or keep_prob < 1: noise_shape = tf.pack([batch_size] + [1] * (n_dims - 2) + [input_size]) inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape) lin = linalg.linear(inputs, output_size, add_bias=add_bias, moving_params=self.moving_params) lin.set_shape(shape_to_set) if self.moving_params is None: with tf.variable_scope('Linear', reuse=True): W = tf.get_variable('Weights') I = tf.diag(tf.ones([output_size])) WTWmI = tf.matmul(W, W, transpose_a=True) - I tf.add_to_collection('ortho_losses', tf.nn.l2_loss(WTWmI)) tf.add_to_collection('covar_losses', self.covar_loss(lin)) return lin