Example #1
0
def gru_cell(prev_hidden: tf.Tensor, input: tf.Tensor) -> tf.Tensor:
    """Compute a single step of a GRU (following the PyTorch parameterization).

    See PyTorch GRUCell, https://pytorch.org/docs/stable/generated/torch.nn.GRUCell.html
    for the definition of this operation & trainable variables.

    Arguments:

      prev_hidden -- shape (batch_size x hidden_size), the previous GRU state,
                     e.g. returned by gru_cell

      input -- shape (batch_size x input_size)

    Returns:

      tensor of shape (batch_size x hidden_size), a new GRU state
    """
    batch_size, hidden_size = assert_shape(prev_hidden, (None, None))
    _, input_size = assert_shape(input, (batch_size, None))
    dtype = prev_hidden.dtype

    weight_i = tf.get_variable(
        "weight_i",
        (3, input_size, hidden_size),
        dtype=dtype,
        initializer=tf.glorot_normal_initializer(),
    )
    bias_i = tf.get_variable("bias_i", (3, hidden_size),
                             dtype=dtype,
                             initializer=tf.zeros_initializer())
    weight_h = tf.get_variable(
        "weight_h",
        (3, hidden_size, hidden_size),
        dtype=dtype,
        initializer=tf.glorot_normal_initializer(),
    )
    bias_h = tf.get_variable("bias_h", (3, hidden_size),
                             dtype=dtype,
                             initializer=tf.zeros_initializer())

    reset_i, update_i, candidate_i = tf.unstack(input @ weight_i +
                                                tf.expand_dims(bias_i, 1))
    reset_h, update_h, candidate_h = tf.unstack(prev_hidden @ weight_h +
                                                tf.expand_dims(bias_h, 1))

    reset = tf.sigmoid(reset_i + reset_h)
    update = tf.sigmoid(update_i + update_h)
    candidate = tf.tanh(candidate_i + reset * candidate_h)
    return (1 - update) * candidate + update * prev_hidden
Example #2
0
 def _convolutional_feature_extractor(self, stft, conv_layer_dropout):
     """ THE FUNCTION BUILDS THE END-TO-END FEATURE EXTRACTION COMPONENT CONSISTING
             OF 2 CONVOLUTIONAL LAYERS COMBINED WITH 2 MAX POOLING LAYERS
                 -Arguments:
                     stft: the framed melspectrograms of the audio files
                 -Returns:
                     conv_out: the features "extracted" after filtering the melspectogram through the convolutional layers   
         """
     self.init = tf.glorot_normal_initializer()
     with tf.variable_scope("Convbb",
                            reuse=tf.AUTO_REUSE,
                            initializer=self.init):
         stft = batch_normalization(stft)
         conv1 = self.conv_layer(input_data=tf.expand_dims(stft, axis=3),
                                 filter_size=8,
                                 channels_in=1,
                                 channels_out=32,
                                 strides=[1, 2, 2, 1],
                                 conv_layer_dropout=conv_layer_dropout,
                                 name="conv1")
         conv2 = tf.nn.max_pool(conv1, [1, 2, 2, 1], [1, 2, 2, 1],
                                padding="SAME")
         conv2 = batch_normalization(conv2)
         conv3 = self.conv_layer(input_data=conv2,
                                 filter_size=4,
                                 channels_in=32,
                                 channels_out=16,
                                 strides=[1, 2, 2, 1],
                                 conv_layer_dropout=conv_layer_dropout,
                                 name="conv3")
         conv3 = tf.nn.max_pool(conv3, [1, 2, 2, 1], [1, 2, 2, 1],
                                padding="SAME")
         conv3 = batch_normalization(conv3)
         conv_out = tf.reshape(conv3, (-1, 256))
     return conv_out
Example #3
0
    def __init__(self,
                 name,
                 in_channels,
                 num_layers,
                 growth_rate,
                 dropout_rate,
                 bottleneck,
                 build_method=Weights.impl.sandbox,
                 ranks=None,
                 kernel_initializer=tf.glorot_normal_initializer(),
                 bias_initializer=tf.zeros_initializer(),
                 kernel_regularizer=None,
                 bias_regularizer=None):
        """

        :param name: Variable scope
        :param N: How many layers
        """

        super().__init__()
        self.name = name
        self.in_channels = in_channels
        self.num_layers = num_layers
        self.growth_rate = growth_rate
        self.build_method = build_method
        self.dropout_rate = dropout_rate
        self.bottleneck = bottleneck
        self.ranks = ranks

        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer
Example #4
0
 def _get_variable(self, name, shape, initializer=None):
   if initializer is None:
     initializer = getattr(
       self, '_weight_initializer', tf.glorot_normal_initializer())
   else:
     assert callable(initializer)
   return tf.get_variable(
     name, shape, dtype=hub.dtype, initializer=initializer)
Example #5
0
def get_initializer(initializer, dtype):
    if initializer == 'zeros':
        return tf.zeros_initializer(dtype=dtype)
    elif initializer == 'ones':
        return tf.ones_initializer(dtype=dtype)
    elif initializer == 'vs':
        return tf.variance_scaling_initializer(dtype=dtype)
    elif initializer == 'xavier':
        return tf.glorot_normal_initializer(dtype=dtype)
    elif initializer == 'he':
        return tf.variance_scaling_initializer(dtype=dtype)
    else:
        raise NotImplementedError
Example #6
0
def feedforward_network(inputStates, inputSize, outputSize, num_fc_layers,
                        depth_fc_layers, tf_datatype, scope):

    with tf.variable_scope(str(scope)):

        #concat K entries together [bs x K x sa] --> [bs x ksa]
        inputState = tf.layers.flatten(inputStates)

        #vars
        intermediate_size = depth_fc_layers
        reuse = False
        initializer = tf.glorot_normal_initializer(
            seed=None, dtype=tf_datatype)
        fc = tf.layers.dense

        # make hidden layers
        for i in range(num_fc_layers):
            if i==0:
                fc_i = fc(
                    inputState,
                    units=intermediate_size,
                    activation=None,
                    kernel_initializer=initializer,
                    bias_initializer=initializer,
                    reuse=reuse,
                    trainable=True)
            else:
                fc_i = fc(
                    h_i,
                    units=intermediate_size,
                    activation=None,
                    kernel_initializer=initializer,
                    bias_initializer=initializer,
                    reuse=reuse,
                    trainable=True)
            h_i = tf.nn.relu(fc_i)

        # make output layer
        z = fc(
            h_i,
            units=outputSize,
            activation=None,
            kernel_initializer=initializer,
            bias_initializer=initializer,
            reuse=reuse,
            trainable=True)

    return z
Example #7
0
def model_fn(model, features, labels, mode):
    global_step = tf.train.get_or_create_global_step()
    xavier_initializer = tf.glorot_normal_initializer()

    fc1_size = 128
    with tf.variable_scope('leader'):
        w1f = tf.get_variable('w1f',
                              shape=[fc1_size, 1],
                              dtype=tf.float32,
                              initializer=tf.random_uniform_initializer(
                                  -0.01, 0.01))
        b1f = tf.get_variable('b1f',
                              shape=[1],
                              dtype=tf.float32,
                              initializer=tf.zeros_initializer())

    if mode == tf.estimator.ModeKeys.TRAIN:
        embedding = model.recv('embedding', tf.float32, require_grad=True)
    else:
        embedding = features['embedding']

    logits = tf.nn.bias_add(tf.matmul(embedding, w1f), b1f)

    if mode == tf.estimator.ModeKeys.TRAIN:
        y = tf.dtypes.cast(labels['y'], tf.float32)
        loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)
        loss = tf.math.reduce_mean(loss)

        # cala auc
        pred = tf.math.sigmoid(logits)
        _, auc = tf.metrics.auc(labels=y, predictions=pred)

        logging_hook = tf.train.LoggingTensorHook({
            "loss": loss,
            "auc": auc
        },
                                                  every_n_iter=10)

        optimizer = tf.train.GradientDescentOptimizer(0.1)
        train_op = model.minimize(optimizer, loss, global_step=global_step)
        return model.make_spec(mode,
                               loss=loss,
                               train_op=train_op,
                               training_hooks=[logging_hook])

    if mode == tf.estimator.ModeKeys.PREDICT:
        return model.make_spec(mode, predictions=logits)
Example #8
0
    def __init__(self,
                 shape,
                 build_method=Weights.impl.sandbox,
                 use_bias=True,
                 kernel_initializer=tf.glorot_normal_initializer(),
                 bias_initializer=tf.zeros_initializer(),
                 kernel_regularizer=None,
                 bias_regularizer=None):

        super().__init__()
        self._shape = shape
        self._build_method = build_method
        self._use_bias = use_bias

        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer
Example #9
0
 def _encoders(self):
     initializer = tf.glorot_normal_initializer()
     entity_encoder = gl.encoders.LookupEncoder(self.entity_num,
                                                self.hidden_dim,
                                                str2hash=self.s2h,
                                                ps_hosts=self.ps_hosts,
                                                init=initializer,
                                                name='entity_encoder')
     relation_encoder = gl.encoders.LookupEncoder(self.relation_num,
                                                  self.hidden_dim,
                                                  str2hash=self.s2h,
                                                  ps_hosts=self.ps_hosts,
                                                  init=initializer,
                                                  use_edge=True,
                                                  name='relation_encoder')
     return {
         "src": entity_encoder,
         "edge": relation_encoder,
         "dst": entity_encoder
     }
Example #10
0
def linear(input: tf.Tensor,
           n_output: int,
           use_bias: bool = True) -> tf.Tensor:
    """A standard linear layer `W x + b`."""
    weight = tf.get_variable(
        "weight",
        dtype=input.dtype,
        shape=(input.shape[-1], n_output),
        initializer=tf.glorot_normal_initializer(),
    )
    output = input @ weight
    if use_bias:
        bias = tf.get_variable(
            "bias",
            dtype=input.dtype,
            shape=(n_output, ),
            initializer=tf.zeros_initializer(),
        )
        output += bias
    return output
Example #11
0
    def __init__(
            self,
            shape,
            strides=(1, 1),
            use_bias=True,
            padding="SAME",  # partitions=[0.8, 0.8],
            partitions=None,
            kernel_initializer=tf.glorot_normal_initializer(),
            bias_initializer=tf.zeros_initializer(),
            kernel_regularizer=None,
            bias_regularizer=None,
            ranks=None):
        """
            Custom implementation for the depthwise separable layers

            The pointwise convolution is separated across the input channel dimensions
            Whereas the depthwise + standard convolution
        """
        super().__init__()

        px = strides[0]
        py = strides[1]
        self._strides = [1, px, py, 1]

        # The two partitions
        self.partitions = partitions

        self._shape = shape
        self._padding = padding
        self._use_bias = use_bias

        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer

        # Rank for the core tensor G
        self.ranks = ranks
Example #12
0
 def _build_bilinear_layers(net, params):
     feat_emb = tf.reshape(net, (-1, deep_fields_size, org_emb_size))
     cnt = 0
     element_wise_product_list = []
     for i in range(0, deep_fields_size):
         for j in range(i + 1, deep_fields_size):
             with tf.variable_scope('weight_', reuse=tf.AUTO_REUSE):
                 weight = tf.get_variable(
                     name='weight_' + str(cnt),
                     shape=[org_emb_size, org_emb_size],
                     initializer=tf.glorot_normal_initializer(
                         seed=random.randint(0, 1024)),
                     dtype=tf.float32)
             element_wise_product_list.append(
                 tf.multiply(tf.matmul(feat_emb[:, i, :], weight),
                             feat_emb[:, j, :]))
             cnt += 1
     element_wise_product = tf.stack(element_wise_product_list)
     element_wise_product = tf.transpose(element_wise_product,
                                         perm=[1, 0, 2],
                                         name="element_wise_product")
     bilinear_output = tf.layers.flatten(element_wise_product)
     return bilinear_output
Example #13
0
    def test_group_lasso_conv3d(self):
        shape = [3, 3, 3]
        video = tf.zeros([2, 3, 3, 3, 1])
        net = slim.conv3d(video,
                          5,
                          shape,
                          padding='VALID',
                          weights_initializer=tf.glorot_normal_initializer(),
                          scope='vconv1')
        conv3d_op = tf.get_default_graph().get_operation_by_name(
            'vconv1/Conv3D')
        conv3d_weights = conv3d_op.inputs[1]

        threshold = 0.09
        flop_reg = flop_regularizer.GroupLassoFlopsRegularizer(
            [net.op], threshold=threshold)
        norm = tf.sqrt(tf.reduce_mean(tf.square(conv3d_weights), [0, 1, 2, 3]))
        alive = tf.reduce_sum(tf.cast(norm > threshold, tf.float32))
        with self.session():
            flop_coeff = 2 * shape[0] * shape[1] * shape[2]
            tf.compat.v1.global_variables_initializer().run()
            self.assertAllClose(flop_reg.get_cost(), flop_coeff * alive)
            self.assertAllClose(flop_reg.get_regularization_term(),
                                flop_coeff * tf.reduce_sum(norm))
Example #14
0
def masked_dense(
        inputs,
        units,
        num_blocks=None,
        exclusive=False,
        kernel_initializer=None,
        reuse=None,
        name=None,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs):
    """A autoregressively masked dense layer. Analogous to `tf.layers.dense`.

  See [Germain et al. (2015)][1] for detailed explanation.

  Arguments:
    inputs: Tensor input.
    units: Python `int` scalar representing the dimensionality of the output
      space.
    num_blocks: Python `int` scalar representing the number of blocks for the
      MADE masks.
    exclusive: Python `bool` scalar representing whether to zero the diagonal of
      the mask, used for the first layer of a MADE.
    kernel_initializer: Initializer function for the weight matrix.
      If `None` (default), weights are initialized using the
      `tf.glorot_random_initializer`.
    reuse: Python `bool` scalar representing whether to reuse the weights of a
      previous layer by the same name.
    name: Python `str` used to describe ops managed by this function.
    *args: `tf.layers.dense` arguments.
    **kwargs: `tf.layers.dense` keyword arguments.

  Returns:
    Output tensor.

  Raises:
    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
      graph execution.

  #### References

  [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE:
       Masked Autoencoder for Distribution Estimation. In _International
       Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509
  """
    # TODO(b/67594795): Better support of dynamic shape.
    input_depth = tf.compat.dimension_value(
        tensorshape_util.with_rank_at_least(inputs.shape, 1)[-1])
    if input_depth is None:
        raise NotImplementedError(
            'Rightmost dimension must be known prior to graph execution.')

    mask = _gen_mask(num_blocks, input_depth, units,
                     MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T

    if kernel_initializer is None:
        kernel_initializer = tf1.glorot_normal_initializer()

    def masked_initializer(shape, dtype=None, partition_info=None):
        return mask * kernel_initializer(shape, dtype, partition_info)

    with tf.name_scope(name or 'masked_dense'):
        layer = tf1.layers.Dense(
            units,
            kernel_initializer=masked_initializer,
            kernel_constraint=lambda x: mask * x,
            name=name,
            dtype=dtype_util.base_dtype(inputs.dtype),
            _scope=name,
            _reuse=reuse,
            *args,  # pylint: disable=keyword-arg-before-vararg
            **kwargs)
        return layer.apply(inputs)
Example #15
0
def generator(inputs, is_train=True, reuse=False):
    image_size = 128
    #s32 = image_size // 32
    gf_dim = 64  # Dimension of gen filters in first conv layer. [64]
    c_dim = 1  # n_color 1
    w_init = tf.glorot_normal_initializer()
    gamma_init = tf.random_normal_initializer(1., 0.02)

    with tf.name_scope("GENERATOR"):

        with tf.variable_scope("generator", reuse=reuse):

            with tf.name_scope("net_in"):
                net_in = InputLayer(inputs, name='g/in')
        #############################################################################
            with tf.name_scope("layer0"):
                net_h0 = DenseLayer(net_in,
                                    n_units=(gf_dim * 32 * 4 * 4),
                                    W_init=w_init,
                                    act=tf.identity,
                                    name='g/h0/lin')
                net_h0 = ReshapeLayer(net_h0,
                                      shape=[-1, 4, 4, gf_dim * 32],
                                      name='g/h0/reshape')
                net_h0 = BatchNormLayer(net_h0,
                                        decay=0.9,
                                        act=tf.nn.relu,
                                        is_train=is_train,
                                        gamma_init=gamma_init,
                                        name='g/h0/batch_norm')

            with tf.name_scope("layer1"):
                net_h1 = DeConv2d(net_h0,
                                  gf_dim * 8, (5, 5),
                                  strides=(2, 2),
                                  padding='SAME',
                                  act=None,
                                  W_init=w_init,
                                  name='g/h1/decon2d')
                net_h1 = BatchNormLayer(net_h1,
                                        decay=0.9,
                                        act=tf.nn.relu,
                                        is_train=is_train,
                                        gamma_init=gamma_init,
                                        name='g/h1/batch_norm')

            with tf.name_scope("layer2"):
                net_h2 = DeConv2d(net_h1,
                                  gf_dim * 4, (5, 5),
                                  strides=(2, 2),
                                  padding='SAME',
                                  act=None,
                                  W_init=w_init,
                                  name='g/h2/decon2d')
                net_h2 = BatchNormLayer(net_h2,
                                        decay=0.9,
                                        act=tf.nn.relu,
                                        is_train=is_train,
                                        gamma_init=gamma_init,
                                        name='g/h2/batch_norm')

            with tf.name_scope("layer3"):
                net_h3 = DeConv2d(net_h2,
                                  gf_dim * 2, (5, 5),
                                  strides=(2, 2),
                                  padding='SAME',
                                  act=None,
                                  W_init=w_init,
                                  name='g/h3/decon2d')
                net_h3 = BatchNormLayer(net_h3,
                                        decay=0.9,
                                        act=tf.nn.relu,
                                        is_train=is_train,
                                        gamma_init=gamma_init,
                                        name='g/h3/batch_norm')

            with tf.name_scope("layer4"):
                net_h4 = DeConv2d(net_h3,
                                  gf_dim, (5, 5),
                                  strides=(2, 2),
                                  padding='SAME',
                                  act=None,
                                  W_init=w_init,
                                  name='g/h4/decon2d')
                net_h4 = BatchNormLayer(net_h4,
                                        decay=0.9,
                                        act=tf.nn.relu,
                                        is_train=is_train,
                                        gamma_init=gamma_init,
                                        name='g/h4/batch_norm')

            with tf.name_scope("layer5"):
                net_h5 = DeConv2d(net_h4,
                                  c_dim, (5, 5),
                                  strides=(2, 2),
                                  padding='SAME',
                                  act=None,
                                  W_init=w_init,
                                  name='g/h5/decon2d')
                #net_h5.outputs = tf.nn.tanh(net_h5.outputs)
                net_h5.outputs = tf.nn.tanh(net_h5.outputs)

        return net_h5
Example #16
0
def discriminator2(inputs, is_train=True, reuse=False):
    df_dim = 32  # Dimension of discrim filters in first conv layer. [64]
    w_init = tf.glorot_normal_initializer()
    gamma_init = tf.random_normal_initializer(1., 0.02)
    lrelu = lambda x: tf.nn.leaky_relu(x, 0.2)

    with tf.name_scope("DISCRIMINATOR2"):
        with tf.variable_scope("discriminator2", reuse=reuse):

            with tf.name_scope("net_in"):
                net_in = InputLayer(inputs, name='d2/in')

            with tf.name_scope("layer0"):
                net_h0 = Conv2d(net_in,
                                df_dim, (3, 3), (3, 3),
                                act=lrelu,
                                padding='SAME',
                                W_init=w_init,
                                name='d2/h0/conv2d')

            with tf.name_scope("layer1"):
                net_h1 = Conv2d(net_h0,
                                df_dim * 2, (3, 3), (3, 3),
                                act=None,
                                padding='SAME',
                                W_init=w_init,
                                name='d2/h1/conv2d')
                net_h1 = BatchNormLayer(net_h1,
                                        decay=0.9,
                                        act=lrelu,
                                        is_train=is_train,
                                        gamma_init=gamma_init,
                                        name='d2/h1/batch_norm')

            with tf.name_scope("layer2"):
                net_h2 = Conv2d(net_h1,
                                df_dim * 4, (3, 3), (3, 3),
                                act=None,
                                padding='SAME',
                                W_init=w_init,
                                name='d2/h2/conv2d')
                net_h2 = BatchNormLayer(net_h2,
                                        decay=0.9,
                                        act=lrelu,
                                        is_train=is_train,
                                        gamma_init=gamma_init,
                                        name='d2/h2/batch_norm')

            with tf.name_scope("layer3"):
                net_h3 = Conv2d(net_h2,
                                df_dim * 8, (3, 3), (3, 3),
                                act=None,
                                padding='SAME',
                                W_init=w_init,
                                name='d2/h3/conv2d')
                net_h3 = BatchNormLayer(net_h3,
                                        decay=0.9,
                                        act=lrelu,
                                        is_train=is_train,
                                        gamma_init=gamma_init,
                                        name='d2/h3/batch_norm')

            with tf.name_scope("layer4"):
                net_h4 = FlattenLayer(net_h3, name='d2/h4/flatten')
                net_h4 = DenseLayer(net_h4,
                                    n_units=df_dim * 8,
                                    act=tf.identity,
                                    W_init=w_init,
                                    name='d2/h4/lin_sigmoid')

            with tf.name_scope("layer5"):
                net_h5 = FlattenLayer(net_h4, name='d2/h5/flatten')
                net_h5 = DenseLayer(net_h5,
                                    n_units=df_dim * 8,
                                    act=tf.identity,
                                    W_init=w_init,
                                    name='d2/h5/lin_sigmoid')

        #net_h6 = FlattenLayer(net_h5, name='d/h6/flatten')
            with tf.name_scope("layer6"):
                net_h6 = DenseLayer(net_h5,
                                    n_units=2,
                                    act=tf.identity,
                                    W_init=w_init,
                                    name='d2/h6/lin_sigmoid')
                logits2 = net_h6.outputs
                net_h6.outputs = tf.nn.softplus(net_h6.outputs)
        return net_h6, logits2
Example #17
0
    def construct_network(self):
        self.word_ids = tf.placeholder(tf.int32, [None, None], name="word_ids")
        self.char_ids = tf.placeholder(tf.int32, [None, None, None],
                                       name="char_ids")
        self.sentence_lengths = tf.placeholder(tf.int32, [None],
                                               name="sentence_lengths")
        self.word_lengths = tf.placeholder(tf.int32, [None, None],
                                           name="word_lengths")
        self.sentence_labels = tf.placeholder(tf.float32, [None],
                                              name="sentence_labels")
        self.word_labels = tf.placeholder(tf.float32, [None, None],
                                          name="word_labels")
        self.word_objective_weights = tf.placeholder(
            tf.float32, [None, None], name="word_objective_weights")
        self.sentence_objective_weights = tf.placeholder(
            tf.float32, [None], name="sentence_objective_weights")
        self.learningrate = tf.placeholder(tf.float32, name="learningrate")
        self.is_training = tf.placeholder(tf.int32, name="is_training")

        self.loss = 0.0
        input_tensor = None
        input_vector_size = 0

        self.initializer = None
        if self.config["initializer"] == "normal":
            self.initializer = tf.random_normal_initializer(mean=0.0,
                                                            stddev=0.1)
        elif self.config["initializer"] == "glorot":
            self.initializer = tf.glorot_uniform_initializer()
        elif self.config["initializer"] == "xavier":
            self.initializer = tf.glorot_normal_initializer()

        zeros_initializer = tf.zeros_initializer()

        self.word_embeddings = tf.get_variable(
            "word_embeddings",
            shape=[len(self.word2id), self.config["word_embedding_size"]],
            initializer=(zeros_initializer if self.config["emb_initial_zero"]
                         == True else self.initializer),
            trainable=(True
                       if self.config["train_embeddings"] == True else False))
        input_tensor = tf.nn.embedding_lookup(self.word_embeddings,
                                              self.word_ids)
        input_vector_size = self.config["word_embedding_size"]

        if self.config["char_embedding_size"] > 0 and self.config[
                "char_recurrent_size"] > 0:
            with tf.variable_scope("chars"), tf.control_dependencies([
                    tf.assert_equal(tf.shape(self.char_ids)[2],
                                    tf.reduce_max(self.word_lengths),
                                    message="Char dimensions don't match")
            ]):
                self.char_embeddings = tf.get_variable(
                    "char_embeddings",
                    shape=[
                        len(self.char2id), self.config["char_embedding_size"]
                    ],
                    initializer=self.initializer,
                    trainable=True)
                char_input_tensor = tf.nn.embedding_lookup(
                    self.char_embeddings, self.char_ids)

                s = tf.shape(char_input_tensor)
                char_input_tensor = tf.reshape(
                    char_input_tensor,
                    shape=[
                        s[0] * s[1], s[2], self.config["char_embedding_size"]
                    ])
                _word_lengths = tf.reshape(self.word_lengths,
                                           shape=[s[0] * s[1]])

                char_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(
                    self.config["char_recurrent_size"],
                    use_peepholes=self.config["lstm_use_peepholes"],
                    state_is_tuple=True,
                    initializer=self.initializer,
                    reuse=False)
                char_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(
                    self.config["char_recurrent_size"],
                    use_peepholes=self.config["lstm_use_peepholes"],
                    state_is_tuple=True,
                    initializer=self.initializer,
                    reuse=False)

                char_lstm_outputs = tf.nn.bidirectional_dynamic_rnn(
                    char_lstm_cell_fw,
                    char_lstm_cell_bw,
                    char_input_tensor,
                    sequence_length=_word_lengths,
                    dtype=tf.float32,
                    time_major=False)
                _, ((_, char_output_fw), (_,
                                          char_output_bw)) = char_lstm_outputs
                char_output_tensor = tf.concat(
                    [char_output_fw, char_output_bw], axis=-1)
                char_output_tensor = tf.reshape(
                    char_output_tensor,
                    shape=[s[0], s[1], 2 * self.config["char_recurrent_size"]])
                char_output_vector_size = 2 * self.config["char_recurrent_size"]

                if self.config["lmcost_char_gamma"] > 0.0:
                    self.loss += self.config[
                        "lmcost_char_gamma"] * self.construct_lmcost(
                            char_output_tensor, char_output_tensor,
                            self.sentence_lengths, self.word_ids, "separate",
                            "lmcost_char_separate")
                if self.config["lmcost_joint_char_gamma"] > 0.0:
                    self.loss += self.config[
                        "lmcost_joint_char_gamma"] * self.construct_lmcost(
                            char_output_tensor, char_output_tensor,
                            self.sentence_lengths, self.word_ids, "joint",
                            "lmcost_char_joint")

                if self.config["char_hidden_layer_size"] > 0:
                    char_output_tensor = tf.layers.dense(
                        char_output_tensor,
                        self.config["char_hidden_layer_size"],
                        activation=tf.tanh,
                        kernel_initializer=self.initializer)
                    char_output_vector_size = self.config[
                        "char_hidden_layer_size"]

                if self.config["char_integration_method"] == "concat":
                    input_tensor = tf.concat(
                        [input_tensor, char_output_tensor], axis=-1)
                    input_vector_size += char_output_vector_size
                elif self.config["char_integration_method"] == "none":
                    input_tensor = input_tensor
                else:
                    raise ValueError("Unknown char integration method")

        self.word_representations = input_tensor

        dropout_input = self.config["dropout_input"] * tf.cast(
            self.is_training, tf.float32) + (
                1.0 - tf.cast(self.is_training, tf.float32))
        input_tensor = tf.nn.dropout(input_tensor,
                                     dropout_input,
                                     name="dropout_word")

        word_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(
            self.config["word_recurrent_size"],
            use_peepholes=self.config["lstm_use_peepholes"],
            state_is_tuple=True,
            initializer=self.initializer,
            reuse=False)
        word_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(
            self.config["word_recurrent_size"],
            use_peepholes=self.config["lstm_use_peepholes"],
            state_is_tuple=True,
            initializer=self.initializer,
            reuse=False)

        with tf.control_dependencies([
                tf.assert_equal(tf.shape(self.word_ids)[1],
                                tf.reduce_max(self.sentence_lengths),
                                message="Sentence dimensions don't match")
        ]):
            (lstm_outputs_fw, lstm_outputs_bw), ((_, lstm_output_fw), (
                _, lstm_output_bw)) = tf.nn.bidirectional_dynamic_rnn(
                    word_lstm_cell_fw,
                    word_lstm_cell_bw,
                    input_tensor,
                    sequence_length=self.sentence_lengths,
                    dtype=tf.float32,
                    time_major=False)

        dropout_word_lstm = self.config["dropout_word_lstm"] * tf.cast(
            self.is_training, tf.float32) + (
                1.0 - tf.cast(self.is_training, tf.float32))
        lstm_outputs_fw = tf.nn.dropout(
            lstm_outputs_fw,
            dropout_word_lstm,
            noise_shape=tf.convert_to_tensor([
                tf.shape(self.word_ids)[0], 1,
                self.config["word_recurrent_size"]
            ],
                                             dtype=tf.int32))
        lstm_outputs_bw = tf.nn.dropout(
            lstm_outputs_bw,
            dropout_word_lstm,
            noise_shape=tf.convert_to_tensor([
                tf.shape(self.word_ids)[0], 1,
                self.config["word_recurrent_size"]
            ],
                                             dtype=tf.int32))
        lstm_outputs = tf.concat([lstm_outputs_fw, lstm_outputs_bw], -1)

        if self.config["whidden_layer_size"] > 0:
            lstm_outputs = tf.layers.dense(lstm_outputs,
                                           self.config["whidden_layer_size"],
                                           activation=tf.tanh,
                                           kernel_initializer=self.initializer)

        self.lstm_outputs = lstm_outputs

        lstm_output = tf.concat([lstm_output_fw, lstm_output_bw], -1)
        lstm_output = tf.nn.dropout(lstm_output, dropout_word_lstm)

        if self.config["sentence_composition"] == "last":
            processed_tensor = lstm_output
            self.attention_weights_unnormalised = tf.zeros_like(
                self.word_ids, dtype=tf.float32)
        elif self.config["sentence_composition"] == "attention":
            with tf.variable_scope("attention"):
                attention_evidence = tf.layers.dense(
                    lstm_outputs,
                    self.config["attention_evidence_size"],
                    activation=tf.tanh,
                    kernel_initializer=self.initializer)

                attention_weights = tf.layers.dense(
                    attention_evidence,
                    1,
                    activation=None,
                    kernel_initializer=self.initializer)
                attention_weights = tf.reshape(attention_weights,
                                               shape=tf.shape(self.word_ids))

                if self.config["attention_activation"] == "sharp":
                    attention_weights = tf.exp(attention_weights)
                elif self.config["attention_activation"] == "soft":
                    attention_weights = tf.sigmoid(attention_weights)
                elif self.config["attention_activation"] == "linear":
                    pass
                else:
                    raise ValueError("Unknown activation for attention: " +
                                     str(self.config["attention_activation"]))

                word_objective_loss = tf.square(attention_weights -
                                                self.word_labels)
                word_objective_loss = tf.where(
                    tf.sequence_mask(self.sentence_lengths),
                    word_objective_loss, tf.zeros_like(word_objective_loss))
                self.loss += self.config[
                    "word_objective_weight"] * tf.reduce_sum(
                        self.word_objective_weights * word_objective_loss)

                self.attention_weights_unnormalised = attention_weights
                attention_weights = tf.where(
                    tf.sequence_mask(self.sentence_lengths), attention_weights,
                    tf.zeros_like(attention_weights))
                attention_weights = attention_weights / tf.reduce_sum(
                    attention_weights, 1, keep_dims=True)
                processed_tensor = tf.reduce_sum(
                    lstm_outputs * attention_weights[:, :, numpy.newaxis], 1)

        if self.config["hidden_layer_size"] > 0:
            processed_tensor = tf.layers.dense(
                processed_tensor,
                self.config["hidden_layer_size"],
                activation=tf.tanh,
                kernel_initializer=self.initializer)

        self.sentence_scores = tf.layers.dense(
            processed_tensor,
            1,
            activation=tf.sigmoid,
            kernel_initializer=self.initializer,
            name="output_ff")
        self.sentence_scores = tf.reshape(
            self.sentence_scores, shape=[tf.shape(processed_tensor)[0]])

        self.loss += self.config["sentence_objective_weight"] * tf.reduce_sum(
            self.sentence_objective_weights *
            tf.square(self.sentence_scores - self.sentence_labels))

        if self.config["attention_objective_weight"] > 0.0:
            self.loss += self.config["attention_objective_weight"] * \
                (tf.reduce_sum(
                    self.sentence_objective_weights *tf.square(
                        tf.reduce_max(
                            tf.where(
                                tf.sequence_mask(self.sentence_lengths),
                                self.attention_weights_unnormalised,
                                tf.zeros_like(self.attention_weights_unnormalised) - 1e6),
                            axis=-1) - self.sentence_labels))
                +
                tf.reduce_sum(
                    self.sentence_objective_weights * tf.square(
                        tf.reduce_min(
                            tf.where(
                                tf.sequence_mask(self.sentence_lengths),
                                self.attention_weights_unnormalised,
                                tf.zeros_like(self.attention_weights_unnormalised) + 1e6),
                            axis=-1) - 0.0)))

        self.token_scores = [
            tf.where(tf.sequence_mask(self.sentence_lengths),
                     self.attention_weights_unnormalised,
                     tf.zeros_like(self.attention_weights_unnormalised) - 1e6)
        ]

        if self.config["lmcost_lstm_gamma"] > 0.0:
            self.loss += self.config[
                "lmcost_lstm_gamma"] * self.construct_lmcost(
                    lstm_outputs_fw, lstm_outputs_bw, self.sentence_lengths,
                    self.word_ids, "separate", "lmcost_lstm_separate")
        if self.config["lmcost_joint_lstm_gamma"] > 0.0:
            self.loss += self.config[
                "lmcost_joint_lstm_gamma"] * self.construct_lmcost(
                    lstm_outputs_fw, lstm_outputs_bw, self.sentence_lengths,
                    self.word_ids, "joint", "lmcost_lstm_joint")

        self.train_op = self.construct_optimizer(self.config["opt_strategy"],
                                                 self.loss, self.learningrate,
                                                 self.config["clip"])
Example #18
0
def model_fn(model, features, labels, mode):
    def sum_pooling(embeddings, slots):
        slot_embeddings = []
        for slot in slots:
            slot_embeddings.append(embeddings[_SLOT_2_IDX[slot]])
        if len(slot_embeddings) == 1:
            return slot_embeddings[0]
        return tf.add_n(slot_embeddings)

    global_step = tf.train.get_or_create_global_step()
    num_slot, embed_size = len(_SLOT_2_BUCKET), 8
    xavier_initializer = tf.glorot_normal_initializer()

    flt.feature.FeatureSlot.set_default_bias_initializer(
        tf.zeros_initializer())
    flt.feature.FeatureSlot.set_default_vec_initializer(
        tf.random_uniform_initializer(-0.0078125, 0.0078125))
    flt.feature.FeatureSlot.set_default_bias_optimizer(
        tf.train.FtrlOptimizer(learning_rate=0.01))
    flt.feature.FeatureSlot.set_default_vec_optimizer(
        tf.train.AdagradOptimizer(learning_rate=0.01))

    # deal with input cols
    categorical_embed = []
    num_slot, embed_dim = len(_SLOT_2_BUCKET), 8

    with tf.variable_scope("follower"):
        for slot, bucket_size in _SLOT_2_BUCKET:
            fs = model.add_feature_slot(slot, bucket_size)
            fc = model.add_feature_column(fs)
            categorical_embed.append(fc.add_vector(embed_dim))

    # concate all embeddings
    slot_embeddings = categorical_embed
    concat_embedding = tf.concat(slot_embeddings, axis=1)
    output_size = len(slot_embeddings) * embed_dim

    model.freeze_slots(features)

    with tf.variable_scope("follower"):
        fc1_size, fc2_size, fc3_size = 512, 256, 128
        w1 = tf.get_variable('w1',
                             shape=[output_size, fc1_size],
                             dtype=tf.float32,
                             initializer=xavier_initializer)
        b1 = tf.get_variable('b1',
                             shape=[fc1_size],
                             dtype=tf.float32,
                             initializer=tf.zeros_initializer())
        w2 = tf.get_variable('w2',
                             shape=[fc1_size, fc2_size],
                             dtype=tf.float32,
                             initializer=xavier_initializer)
        b2 = tf.get_variable('b2',
                             shape=[fc2_size],
                             dtype=tf.float32,
                             initializer=tf.zeros_initializer())
        w3 = tf.get_variable('w3',
                             shape=[fc2_size, fc3_size],
                             dtype=tf.float32,
                             initializer=xavier_initializer)
        b3 = tf.get_variable('b3',
                             shape=[fc3_size],
                             dtype=tf.float32,
                             initializer=tf.zeros_initializer())

    act1_l = tf.nn.relu(tf.nn.bias_add(tf.matmul(concat_embedding, w1), b1))
    act1_l = tf.layers.batch_normalization(act1_l, training=True)
    act2_l = tf.nn.relu(tf.nn.bias_add(tf.matmul(act1_l, w2), b2))
    act2_l = tf.layers.batch_normalization(act2_l, training=True)
    embedding = tf.nn.relu(tf.nn.bias_add(tf.matmul(act2_l, w3), b3))
    embedding = tf.layers.batch_normalization(embedding, training=True)

    if mode == tf.estimator.ModeKeys.TRAIN:
        embedding_grad = model.send('embedding', embedding, require_grad=True)
        optimizer = tf.train.GradientDescentOptimizer(0.1)
        train_op = model.minimize(optimizer,
                                  embedding,
                                  grad_loss=embedding_grad,
                                  global_step=global_step)
        return model.make_spec(mode,
                               loss=tf.math.reduce_mean(embedding),
                               train_op=train_op)
    elif mode == tf.estimator.ModeKeys.PREDICT:
        return model.make_spec(mode, predictions={'embedding': embedding})