Beispiel #1
0
    def __call__(self, inputs, state, scope=None):
        """"""

        with tf.variable_scope(scope or type(self).__name__):
            cell_tm1, hidden_tm1 = tf.split(axis=1,
                                            num_or_size_splits=2,
                                            value=state)
            with tf.variable_scope('Gates'):
                linear = linalg.linear([inputs, hidden_tm1],
                                       self.output_size,
                                       add_bias=True,
                                       n_splits=2,
                                       moving_params=self.moving_params)
                update_act, reset_act = linear
                update_gate = linalg.sigmoid(update_act - self.forget_bias)
                reset_gate = linalg.sigmoid(reset_act)
                reset_state = reset_gate * hidden_tm1
            with tf.variable_scope('Candidate'):
                hidden_act = linalg.linear([inputs, reset_state],
                                           self.output_size,
                                           add_bias=True,
                                           moving_params=self.moving_params)
                hidden_tilde = self.recur_func(hidden_act)
            cell_t = update_gate * cell_tm1 + (1 - update_gate) * hidden_tilde
        return cell_t, tf.concat(axis=1, values=[cell_t, cell_t])
Beispiel #2
0
    def conditional_linear_classifier(self,
                                      inputs,
                                      n_classes,
                                      probs,
                                      add_bias=True):
        """"""

        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        bucket_size = input_shape[1]
        input_size = inputs.get_shape().as_list()[-1]

        if len(probs.get_shape().as_list()) == 2:
            probs = tf.to_float(
                tf.one_hot(tf.to_int64(probs), bucket_size, 1, 0))
        else:
            probs = tf.stop_gradient(probs)

        if self.moving_params is None:
            keep_prob = self.mlp_keep_prob
        else:
            keep_prob = 1
        if isinstance(keep_prob, tf.Tensor) or keep_prob < 1:
            noise_shape = tf.stack([batch_size, 1, 1, input_size])
            inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)

        lin = linalg.linear(inputs,
                            n_classes,
                            add_bias=add_bias,
                            initializer=tf.zeros_initializer(),
                            moving_params=self.moving_params)
        weighted_lin = tf.matmul(lin, tf.expand_dims(probs, 3), adjoint_a=True)

        return weighted_lin, lin
Beispiel #3
0
    def linear_classifier(self,
                          inputs,
                          n_classes,
                          add_bias=True,
                          keep_prob=None):
        """"""

        n_dims = len(inputs.get_shape().as_list())
        batch_size = tf.shape(inputs)[0]
        bucket_size = tf.shape(inputs)[1]
        input_size = inputs.get_shape().as_list()[-1]
        output_size = n_classes
        output_shape = tf.stack([batch_size] + [bucket_size] * (n_dims - 2) +
                                [output_size])

        if self.moving_params is None:
            if keep_prob is None:
                keep_prob = self.mlp_keep_prob
        else:
            keep_prob = 1
        if isinstance(keep_prob, tf.Tensor) or keep_prob < 1:
            noise_shape = tf.stack([batch_size] + [1] * (n_dims - 2) +
                                   [input_size])
            inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)

        inputs = tf.reshape(inputs, [-1, input_size])
        output = linalg.linear(inputs,
                               output_size,
                               add_bias=add_bias,
                               initializer=tf.zeros_initializer(),
                               moving_params=self.moving_params)
        output = tf.reshape(output, output_shape)
        output.set_shape([tf.Dimension(None)] * (n_dims - 1) +
                         [tf.Dimension(output_size)])
        return output
Beispiel #4
0
    def __call__(self, inputs, state, scope=None):
        """"""

        if self.recur_diag_bilin:
            inputs1, inputs2 = tf.split(1, 2, inputs)
            inputs = tf.concat(1, [inputs1 * inputs2, inputs1, inputs2])
        with tf.variable_scope(scope or type(self).__name__):
            cell_tm1, hidden_tm1 = tf.split(1, 2, state)
            linear = linalg.linear([inputs, hidden_tm1],
                                   self.output_size,
                                   add_bias=True,
                                   n_splits=3,
                                   moving_params=self.moving_params)
            cell_act, update_act, output_act = linear

            cell_tilde_t = cell_act
            update_gate = linalg.sigmoid(update_act - self.forget_bias)
            output_gate = linalg.sigmoid(output_act)
            cell_t = update_gate * cell_tilde_t + (1 - update_gate) * cell_tm1
            hidden_tilde_t = self.recur_func(cell_t)
            hidden_t = hidden_tilde_t * output_gate

            if self.hidden_include_prob < 1 and self.moving_params is None:
                hidden_mask = tf.nn.dropout(
                    tf.ones_like(hidden_t),
                    self.hidden_include_prob) * self.hidden_include_prob
                hidden_t = hidden_mask * hidden_t + (1 -
                                                     hidden_mask) * hidden_tm1
            if self.cell_include_prob < 1 and self.moving_params is None:
                cell_mask = tf.nn.dropout(
                    tf.ones_like(cell_t),
                    self.cell_include_prob) * self.cell_include_prob
                cell_t = cell_mask * cell_t + (1 - cell_mask) * cell_tm1

            return hidden_t, tf.concat(1, [cell_t, hidden_t])
Beispiel #5
0
    def double_MLP(self, inputs, n_splits=1):
        """"""

        batch_size = tf.shape(inputs)[0]
        bucket_size = tf.shape(inputs)[1]
        input_size = inputs.get_shape().as_list()[-1]
        output_size = self.mlp_size
        output_shape = tf.pack(
            [batch_size, bucket_size, bucket_size, output_size])
        shape_to_set = [
            tf.Dimension(None),
            tf.Dimension(None),
            tf.Dimension(None),
            tf.Dimension(output_size)
        ]

        if self.moving_params is None:
            if self.drop_gradually:
                s = self.global_sigmoid
                keep_prob = s + (1 - s) * self.mlp_keep_prob
            else:
                keep_prob = self.mlp_keep_prob
        else:
            keep_prob = 1
        if isinstance(keep_prob, tf.Tensor) or keep_prob < 1:
            noise_shape = tf.pack([batch_size, 1, input_size])
            inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)

        lin1, lin2 = linalg.linear(inputs,
                                   output_size * n_splits,
                                   n_splits=2,
                                   add_bias=True,
                                   moving_params=self.moving_params)
        lin1 = tf.reshape(tf.transpose(lin1, [0, 2, 1]),
                          tf.pack([-1, bucket_size, 1]))
        lin2 = tf.reshape(tf.transpose(lin2, [0, 2, 1]),
                          tf.pack([-1, 1, bucket_size]))
        lin = lin1 + lin2
        lin = tf.reshape(
            lin,
            tf.pack(
                [batch_size, n_splits * output_size, bucket_size,
                 bucket_size]))
        lin = tf.transpose(lin, [0, 2, 3, 1])
        top_mlps = tf.split(3, n_splits, self.mlp_func(lin))
        for top_mlp in top_mlps:
            top_mlp.set_shape(shape_to_set)
        if self.moving_params is None:
            with tf.variable_scope('Linear', reuse=True):
                matrix = tf.get_variable('Weights')
                I = tf.diag(tf.ones([self.mlp_size]))
                for W in tf.split(1, 2 * n_splits, matrix):
                    WTWmI = tf.matmul(W, W, transpose_a=True) - I
                    tf.add_to_collection('ortho_losses', tf.nn.l2_loss(WTWmI))
            for split in top_mlps:
                tf.add_to_collection('covar_losses', self.covar_loss(split))
        if n_splits == 1:
            return top_mlps[0]
        else:
            return top_mlps
Beispiel #6
0
    def __call__(self, inputs, state, scope=None):
        """"""

        with tf.variable_scope(scope or type(self).__name__):
            cell_tm1, hidden_tm1 = tf.split(1, 2, state)
            linear = linalg.linear([inputs, hidden_tm1],
                                   self.output_size,
                                   add_bias=False,
                                   n_splits=4,
                                   moving_params=self.moving_params)
            with tf.variable_scope('Linear'):
                biases = tf.get_variable('Biases', [3 * self.output_size],
                                         initializer=tf.zeros_initializer)
            biases = tf.split(0, 3, biases)
            cell_act, input_act, forget_act, output_act = linear
            input_bias, forget_bias, output_bias = biases

            cell_tilde_t = linalg.tanh(cell_act)
            input_gate = linalg.sigmoid(input_act + input_bias)
            forget_gate = linalg.sigmoid(forget_act + forget_bias -
                                         self.forget_bias)
            output_gate = linalg.sigmoid(output_act + output_bias)
            cell_t = input_gate * cell_tilde_t + (1 - forget_gate) * cell_tm1
            hidden_tilde_t = self.recur_func(cell_t)
            hidden_t = hidden_tilde_t * output_gate

            return hidden_t, tf.concat(1, [cell_t, hidden_t])
Beispiel #7
0
 def double_MLP(self, inputs, n_splits=1):
   """"""
   
   batch_size = tf.shape(inputs)[0]
   bucket_size = tf.shape(inputs)[1]
   input_size = inputs.get_shape().as_list()[-1]
   output_size = self.attn_mlp_size
   output_shape = tf.stack([batch_size, bucket_size, bucket_size, output_size])
   shape_to_set = [tf.Dimension(None), tf.Dimension(None), tf.Dimension(None), tf.Dimension(output_size)]
   
   if self.moving_params is None:
     keep_prob = self.mlp_keep_prob
   else:
     keep_prob = 1
   if isinstance(keep_prob, tf.Tensor) or keep_prob < 1:
     noise_shape = tf.stack([batch_size, 1, input_size])
     inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)
   
   lin1, lin2 = linalg.linear(inputs,
                              output_size*n_splits,
                              n_splits=2,
                              add_bias=True,
                              moving_params=self.moving_params)
   lin1 = tf.reshape(tf.transpose(lin1, [0, 2, 1]), tf.stack([-1, bucket_size, 1]))
   lin2 = tf.reshape(tf.transpose(lin2, [0, 2, 1]), tf.stack([-1, 1, bucket_size]))
   lin = lin1 + lin2
   lin = tf.reshape(lin, tf.stack([batch_size, n_splits*output_size, bucket_size, bucket_size]))
   lin = tf.transpose(lin, [0,2,3,1])
   top_mlps = tf.split(axis=3, num_or_size_splits=n_splits, value=self.mlp_func(lin))
   for top_mlp in top_mlps:
     top_mlp.set_shape(shape_to_set)
   if n_splits == 1:
     return top_mlps[0]
   else:
     return top_mlps
Beispiel #8
0
  def linear(self, inputs, output_size, n_splits=1, add_bias=False):
    """"""
    
    n_dims = len(inputs.get_shape().as_list())
    batch_size = tf.shape(inputs)[0]
    bucket_size = tf.shape(inputs)[1]
    input_size = inputs.get_shape().as_list()[-1]
    output_shape = tf.stack([batch_size] + [bucket_size]*(n_dims-2) + [output_size])
    shape_to_set = [tf.Dimension(None)]*(n_dims-1) + [tf.Dimension(output_size)]
    
    if self.moving_params is None:
      keep_prob = self.info_keep_prob
    else:
      keep_prob = 1
    
    if keep_prob < 1:
      noise_shape = tf.stack([batch_size] + [1]*(n_dims-2) + [input_size])
      inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)

    lin = linalg.linear(inputs,
                        output_size,
                        n_splits=n_splits,
                        add_bias=add_bias,
                        moving_params=self.moving_params)
    if n_splits == 1:
      lin = [lin]
    for i, split in enumerate(lin):
      split.set_shape(shape_to_set)
    if n_splits == 1:
      return lin[0]
    else:
      return lin
Beispiel #9
0
    def __call__(self, inputs, state, scope=None):
        """"""

        with tf.variable_scope(scope or type(self).__name__):
            hidden_act = linalg.linear([inputs, state],
                                       self.output_size,
                                       add_bias=False,
                                       moving_params=self.moving_params)
            hidden = self.recur_func(hidden_act)
        return hidden, hidden
Beispiel #10
0
    def __call__(self, inputs, state, scope=None):
        """"""

        if self.recur_diag_bilin:
            inputs1, inputs2 = tf.split(1, 2, inputs)
            inputs = tf.concat(1, [inputs1 * inputs2, inputs1, inputs2])
        with tf.variable_scope(scope or type(self).__name__):
            hidden_act = linalg.linear([inputs, state],
                                       self.output_size,
                                       add_bias=False,
                                       moving_params=self.moving_params)
            hidden = self.recur_func(hidden_act)
        return hidden, hidden
Beispiel #11
0
    def __call__(self, inputs, state, scope=None):
        """"""

        with tf.variable_scope(scope or type(self).__name__):
            with tf.variable_scope('Gates'):
                linear = linalg.linear([inputs, state],
                                       self.output_size,
                                       add_bias=True,
                                       n_splits=2,
                                       moving_params=self.moving_params)
                update_act, reset_act = linear
                update_gate = linalg.sigmoid(update_act - self.forget_bias)
                reset_gate = linalg.sigmoid(reset_act)
                reset_state = reset_gate * state
            with tf.variable_scope('Candidate'):
                hidden_act = linalg.linear([inputs, reset_state],
                                           self.output_size,
                                           add_bias=False,
                                           moving_params=self.moving_params)
                hidden_tilde = self.recur_func(hidden_act)
            hidden = update_gate * state + (1 - update_gate) * hidden_tilde
        return hidden, hidden
Beispiel #12
0
    def MLP(self, inputs, n_splits=1):
        """"""

        n_dims = len(inputs.get_shape().as_list())
        batch_size = tf.shape(inputs)[0]
        bucket_size = tf.shape(inputs)[1]
        input_size = inputs.get_shape().as_list()[-1]
        output_size = self.mlp_size
        output_shape = tf.pack([batch_size] + [bucket_size] * (n_dims - 2) +
                               [output_size])
        shape_to_set = [tf.Dimension(None)] * (n_dims - 1) + [
            tf.Dimension(output_size)
        ]

        if self.moving_params is None:
            if self.drop_gradually:
                s = self.global_sigmoid
                keep_prob = s + (1 - s) * self.mlp_keep_prob
            else:
                keep_prob = self.mlp_keep_prob
        else:
            keep_prob = 1
        if isinstance(keep_prob, tf.Tensor) or keep_prob < 1:
            noise_shape = tf.pack([batch_size] + [1] * (n_dims - 2) +
                                  [input_size])
            inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)

        linear = linalg.linear(inputs,
                               output_size,
                               n_splits=n_splits,
                               add_bias=True,
                               moving_params=self.moving_params)
        if n_splits == 1:
            linear = [linear]
        for i, split in enumerate(linear):
            split = self.mlp_func(split)
            split.set_shape(shape_to_set)
            linear[i] = split
        if self.moving_params is None:
            with tf.variable_scope('Linear', reuse=True):
                matrix = tf.get_variable('Weights')
                I = tf.diag(tf.ones([self.mlp_size]))
                for W in tf.split(1, n_splits, matrix):
                    WTWmI = tf.matmul(W, W, transpose_a=True) - I
                    tf.add_to_collection('ortho_losses', tf.nn.l2_loss(WTWmI))
            for split in linear:
                tf.add_to_collection('covar_losses', self.covar_loss(split))
        if n_splits == 1:
            return linear[0]
        else:
            return linear
Beispiel #13
0
    def __call__(self, inputs, state, scope=None):
        """"""

        with tf.compat.v1.variable_scope(scope or type(self).__name__):
            cell_tm1, hidden_tm1 = tf.split(axis=1,
                                            num_or_size_splits=2,
                                            value=state)
            if self.recur_diag_bilin:
                inputs1, inputs2 = tf.split(axis=1,
                                            num_or_size_splits=2,
                                            value=inputs)
                input_list = [inputs1 * inputs2, inputs1 + inputs2, hidden_tm1]
            else:
                input_list = [inputs, hidden_tm1]
            linear = linalg.linear(input_list,
                                   self.output_size,
                                   add_bias=False,
                                   n_splits=4,
                                   moving_params=self.moving_params)
            with tf.compat.v1.variable_scope('Linear'):
                biases = tf.compat.v1.get_variable(
                    'Biases', [4 * self.output_size],
                    initializer=tf.zeros_initializer())
            biases = tf.split(axis=0, num_or_size_splits=4, value=biases)
            cell_act, input_act, forget_act, output_act = linear
            cell_bias, input_bias, forget_bias, output_bias = biases

            cell_tilde_t = linalg.tanh(cell_act + cell_bias)
            input_gate = linalg.sigmoid(input_act + input_bias)
            forget_gate = linalg.sigmoid(forget_act + forget_bias -
                                         self.forget_bias)
            output_gate = linalg.sigmoid(output_act + output_bias)
            cell_t = input_gate * cell_tilde_t + (1 - forget_gate) * cell_tm1
            hidden_tilde_t = self.recur_func(cell_t)
            hidden_t = hidden_tilde_t * output_gate

            if self.hidden_include_prob < 1 and self.moving_params is None:
                hidden_mask = tf.nn.dropout(
                    tf.ones_like(hidden_t),
                    self.hidden_include_prob) * self.hidden_include_prob
                hidden_t = hidden_mask * hidden_t + (1 -
                                                     hidden_mask) * hidden_tm1
            if self.cell_include_prob < 1 and self.moving_params is None:
                cell_mask = tf.nn.dropout(
                    tf.ones_like(cell_t),
                    self.cell_include_prob) * self.cell_include_prob
                cell_t = cell_mask * cell_t + (1 - cell_mask) * cell_tm1

            return hidden_t, tf.concat(axis=1, values=[cell_t, hidden_t])
Beispiel #14
0
    def MLP(self, inputs, output_size, func=None, keep_prob=None, n_splits=1):
        """"""

        n_dims = len(inputs.get_shape().as_list())
        batch_size = tf.shape(inputs)[0]
        bucket_size = tf.shape(inputs)[1]
        input_size = inputs.get_shape().as_list()[-1]
        output_shape = tf.stack([batch_size] + [bucket_size] * (n_dims - 2) +
                                [output_size])
        shape_to_set = [tf.Dimension(None)] * (n_dims - 1) + [
            tf.Dimension(output_size)
        ]
        if func is None:
            func = self.mlp_func

        if self.moving_params is None:
            if keep_prob is None:
                keep_prob = self.mlp_keep_prob
        else:
            keep_prob = 1
        if keep_prob < 1:
            noise_shape = tf.stack([batch_size] + [1] * (n_dims - 2) +
                                   [input_size])
            inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)

        linear = linalg.linear(
            inputs,
            output_size,
            n_splits=n_splits *
            (1 + (func.__name__ in ('gated_tanh', 'gated_identity'))),
            add_bias=True,
            moving_params=self.moving_params)
        if func.__name__ in ('gated_tanh', 'gated_identity'):
            linear = [
                tf.concat(axis=n_dims - 1, values=[lin1, lin2])
                for lin1, lin2 in zip(linear[:len(linear) //
                                             2], linear[len(linear) // 2:])
            ]
        if n_splits == 1:
            linear = [linear]
        for i, split in enumerate(linear):
            split = func(split)
            split.set_shape(shape_to_set)
            linear[i] = split
        if n_splits == 1:
            return linear[0]
        else:
            return linear
Beispiel #15
0
    def linear(self, inputs, output_size, add_bias=False):
        """"""

        n_dims = len(inputs.get_shape().as_list())
        batch_size = tf.shape(inputs)[0]
        bucket_size = tf.shape(inputs)[1]
        input_size = inputs.get_shape().as_list()[-1]
        output_shape = tf.pack([batch_size] + [bucket_size] * (n_dims - 2) +
                               [output_size])
        shape_to_set = [tf.Dimension(None)] * (n_dims - 1) + [
            tf.Dimension(output_size)
        ]

        if self.moving_params is None:
            if self.drop_gradually:
                s = self.global_sigmoid
                keep_prob = s + (1 - s) * self.mlp_keep_prob
            else:
                keep_prob = self.mlp_keep_prob
        else:
            keep_prob = 1
        if isinstance(keep_prob, tf.Tensor) or keep_prob < 1:
            noise_shape = tf.pack([batch_size] + [1] * (n_dims - 2) +
                                  [input_size])
            inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)

        lin = linalg.linear(inputs,
                            output_size,
                            add_bias=add_bias,
                            moving_params=self.moving_params)
        lin.set_shape(shape_to_set)
        if self.moving_params is None:
            with tf.variable_scope('Linear', reuse=True):
                W = tf.get_variable('Weights')
                I = tf.diag(tf.ones([output_size]))
                WTWmI = tf.matmul(W, W, transpose_a=True) - I
                tf.add_to_collection('ortho_losses', tf.nn.l2_loss(WTWmI))
            tf.add_to_collection('covar_losses', self.covar_loss(lin))
        return lin