Beispiel #1
0
    def logits(self) -> tf.Tensor:
        vocabulary_size = len(self.vocabulary)

        encoder_states = self.encoder.temporal_states

        weights = get_variable(
            name="state_to_word_W",
            shape=[encoder_states.shape[2], vocabulary_size + 1],
            initializer=tf.random_uniform_initializer(-0.5, 0.5))

        biases = get_variable(
            name="state_to_word_b",
            shape=[vocabulary_size + 1],
            initializer=tf.zeros_initializer())

        # To multiply 3-D matrix (encoder hidden states) by a 2-D matrix
        # (weights), we use 1-by-1 convolution (similar trick can be found in
        # attention computation)

        encoder_states = tf.expand_dims(encoder_states, 2)
        weights_4d = tf.expand_dims(tf.expand_dims(weights, 0), 0)

        multiplication = tf.nn.conv2d(
            encoder_states, weights_4d, [1, 1, 1, 1], "SAME")
        multiplication_3d = tf.squeeze(multiplication, axis=2)

        biases_3d = tf.expand_dims(tf.expand_dims(biases, 0), 0)

        logits = multiplication_3d + biases_3d
        return tf.transpose(logits, perm=[1, 0, 2])  # time major
    def __init__(self,
                 name: str,
                 dimension: int,
                 data_id: str,
                 output_shape: int = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint,
                           initializers)
        check_argument_types()

        if dimension <= 0:
            raise ValueError("Input vector dimension must be postive.")
        if output_shape is not None and output_shape <= 0:
            raise ValueError("Output vector dimension must be postive.")

        self.vector = tf.placeholder(
            tf.float32, shape=[None, dimension])
        self.data_id = data_id

        with self.use_scope():
            if output_shape is not None and dimension != output_shape:
                project_w = get_variable(
                    shape=[dimension, output_shape],
                    name="img_init_proj_W")
                project_b = get_variable(
                    name="img_init_b", shape=[output_shape],
                    initializer=tf.zeros_initializer())

                self._encoded = tf.matmul(
                    self.vector, project_w) + project_b
            else:
                self._encoded = self.vector
Beispiel #3
0
    def output(self) -> tf.Tensor:
        pooled_outputs = []
        for filter_size, num_filters in self.filters:
            with tf.variable_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, self.embedding_size, num_filters]
                w_filter = get_variable(
                    "conv_W", filter_shape,
                    initializer=tf.variance_scaling_initializer(
                        mode="fan_avg", distribution="uniform"))
                b_filter = get_variable(
                    "conv_bias", [num_filters],
                    initializer=tf.zeros_initializer())
                conv = tf.nn.conv1d(
                    self.embedded_inputs,
                    w_filter,
                    stride=1,
                    padding="VALID",
                    name="conv")

                # Apply nonlinearity
                conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter))

                # Max-pooling over the outputs
                pooled = tf.reduce_max(conv_relu, 1)
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        return tf.concat(pooled_outputs, axis=1)
    def __init__(self,
                 name: str,
                 input_shape: List[int],
                 output_shape: int,
                 data_id: str,
                 save_checkpoint: Optional[str] = None,
                 load_checkpoint: Optional[str] = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint,
                           initializers)

        assert len(input_shape) == 3
        if output_shape <= 0:
            raise ValueError("Output vector dimension must be postive.")

        self.data_id = data_id

        with self.use_scope():
            features_shape = [None] + input_shape  # type: ignore
            self.image_features = tf.placeholder(tf.float32,
                                                 shape=features_shape,
                                                 name="image_input")

            self.flat = tf.reduce_mean(self.image_features,
                                       axis=[1, 2],
                                       name="average_image")

            self.project_w = get_variable(
                name="img_init_proj_W",
                shape=[input_shape[2], output_shape],
                initializer=tf.glorot_normal_initializer())
            self.project_b = get_variable(
                name="img_init_b", shape=[output_shape],
                initializer=tf.zeros_initializer())
Beispiel #5
0
    def get_encoder_projections(self, scope):
        encoder_projections = []
        with tf.variable_scope(scope):
            for i, encoder_tensor in enumerate(self._encoders_tensors):
                encoder_state_size = encoder_tensor.get_shape()[2].value
                encoder_tensor_shape = tf.shape(encoder_tensor)

                proj_matrix = get_variable(
                    "proj_matrix_{}".format(i),
                    [encoder_state_size, self.attention_state_size],
                    initializer=tf.random_normal_initializer(stddev=0.001))

                proj_bias = get_variable("proj_bias_{}".format(i),
                                         shape=[self.attention_state_size],
                                         initializer=tf.zeros_initializer())

                encoder_tensor_2d = tf.reshape(encoder_tensor,
                                               [-1, encoder_state_size])

                projected_2d = tf.matmul(encoder_tensor_2d,
                                         proj_matrix) + proj_bias
                assert_shape(projected_2d, [-1, self.attention_state_size])

                projection = tf.reshape(projected_2d, [
                    encoder_tensor_shape[0], encoder_tensor_shape[1],
                    self.attention_state_size
                ])

                encoder_projections.append(projection)
            return encoder_projections
    def output(self) -> tf.Tensor:
        pooled_outputs = []
        for filter_size, num_filters in self.filters:
            with tf.variable_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, self.embedding_size, num_filters]
                w_filter = get_variable(
                    "conv_W", filter_shape,
                    initializer=tf.variance_scaling_initializer(
                        mode="fan_avg", distribution="uniform"))
                b_filter = get_variable(
                    "conv_bias", [num_filters],
                    initializer=tf.zeros_initializer())
                conv = tf.nn.conv1d(
                    self.embedded_inputs,
                    w_filter,
                    stride=1,
                    padding="VALID",
                    name="conv")

                # Apply nonlinearity
                conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter))

                # Max-pooling over the outputs
                pooled = tf.reduce_max(conv_relu, 1)
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        return tf.concat(pooled_outputs, axis=1)
Beispiel #7
0
    def get_encoder_projections(self, scope: str) -> List[tf.Tensor]:
        encoder_projections = []
        with tf.variable_scope(scope):
            for i, encoder_tensor in enumerate(self._encoders_tensors):
                encoder_state_size = encoder_tensor.get_shape()[2].value
                encoder_tensor_shape = tf.shape(encoder_tensor)

                proj_matrix = get_variable(
                    "proj_matrix_{}".format(i),
                    [encoder_state_size, self.attention_state_size],
                    initializer=tf.random_normal_initializer(stddev=0.001))

                proj_bias = get_variable(
                    "proj_bias_{}".format(i),
                    shape=[self.attention_state_size],
                    initializer=tf.zeros_initializer())

                encoder_tensor_2d = tf.reshape(
                    encoder_tensor, [-1, encoder_state_size])

                projected_2d = tf.matmul(
                    encoder_tensor_2d, proj_matrix) + proj_bias
                assert_shape(projected_2d, [-1, self.attention_state_size])

                projection = tf.reshape(
                    projected_2d, [encoder_tensor_shape[0],
                                   encoder_tensor_shape[1],
                                   self.attention_state_size])

                encoder_projections.append(projection)
            return encoder_projections
Beispiel #8
0
    def logits(self) -> tf.Tensor:
        vocabulary_size = len(self.vocabulary)

        encoder_states = self.encoder.temporal_states

        weights = get_variable(
            name="state_to_word_W",
            shape=[encoder_states.shape[2], vocabulary_size + 1],
            initializer=tf.random_uniform_initializer(-0.5, 0.5))

        biases = get_variable(name="state_to_word_b",
                              shape=[vocabulary_size + 1],
                              initializer=tf.zeros_initializer())

        # To multiply 3-D matrix (encoder hidden states) by a 2-D matrix
        # (weights), we use 1-by-1 convolution (similar trick can be found in
        # attention computation)

        encoder_states = tf.expand_dims(encoder_states, 2)
        weights_4d = tf.expand_dims(tf.expand_dims(weights, 0), 0)

        multiplication = tf.nn.conv2d(encoder_states, weights_4d, [1, 1, 1, 1],
                                      "SAME")
        multiplication_3d = tf.squeeze(multiplication, squeeze_dims=[2])

        biases_3d = tf.expand_dims(tf.expand_dims(biases, 0), 0)

        logits = multiplication_3d + biases_3d
        return tf.transpose(logits, perm=[1, 0, 2])  # time major
def highway(inputs, activation=tf.nn.relu, scope="HighwayNetwork"):
    """Create a single highway layer.

    y = H(x, Wh) * T(x, Wt) + x * C(x, Wc)

    where:

    C(x, Wc) = 1 - T(x, Wt)

    Arguments:
        inputs: A tensor or list of tensors. It should be 2D tensors with
                equal length in the first dimension (batch size)
        activation: Activation function of the linear part of the formula
                H(x, Wh).
        scope: The name of the scope used for the variables.

    Returns:
        A tensor of shape tf.shape(inputs)
    """
    with tf.variable_scope(scope):
        if isinstance(inputs, list):
            # if there is a list of tensor on the input, concatenate along
            # the last dimension and project.
            inputs = tf.concat(inputs, axis=-1)

        # pylint: disable=no-member
        vec_size = inputs.get_shape().as_list()[-1]

        # pylint: disable=invalid-name
        W_shape = [vec_size, vec_size]
        b_shape = [vec_size]

        W_H = get_variable("weight_H",
                           shape=W_shape,
                           initializer=tf.glorot_normal_initializer())
        b_H = get_variable("bias_H",
                           shape=b_shape,
                           initializer=tf.constant_initializer(-1.0))

        W_T = get_variable("weight_T",
                           shape=W_shape,
                           initializer=tf.glorot_normal_initializer())
        b_T = get_variable("bias_T",
                           shape=b_shape,
                           initializer=tf.constant_initializer(-1.0))

        T = tf.sigmoid(tf.add(tf.matmul(inputs, W_T), b_T),
                       name="transform_gate")
        H = activation(tf.add(tf.matmul(inputs, W_H), b_H), name="activation")
        C = tf.subtract(1.0, T, name="carry_gate")

        y = tf.add(tf.multiply(H, T), tf.multiply(inputs, C), "y")
        return y
Beispiel #10
0
def highway(inputs, activation=tf.nn.relu, scope="HighwayNetwork"):
    """Create a single highway layer.

    y = H(x, Wh) * T(x, Wt) + x * C(x, Wc)

    where:

    C(x, Wc) = 1 - T(x, Wt)

    Arguments:
        inputs: A tensor or list of tensors. It should be 2D tensors with
                equal length in the first dimension (batch size)
        activation: Activation function of the linear part of the formula
                H(x, Wh).
        scope: The name of the scope used for the variables.

    Returns:
        A tensor of shape tf.shape(inputs)
    """
    with tf.variable_scope(scope):
        if isinstance(inputs, list):
            # if there is a list of tensor on the input, concatenate along
            # the last dimension and project.
            inputs = tf.concat(inputs, axis=-1)

        vec_size = inputs.get_shape().as_list()[-1]

        # pylint: disable=invalid-name
        W_shape = [vec_size, vec_size]
        b_shape = [vec_size]

        W_H = get_variable("weight_H", shape=W_shape)
        b_H = get_variable("bias_H", shape=b_shape,
                           initializer=tf.constant_initializer(-1.0))

        W_T = get_variable("weight_T", shape=W_shape)
        b_T = get_variable("bias_T", shape=b_shape,
                           initializer=tf.constant_initializer(-1.0))

        T = tf.sigmoid(
            tf.add(tf.matmul(inputs, W_T), b_T),
            name="transform_gate")
        H = activation(
            tf.add(tf.matmul(inputs, W_H), b_H),
            name="activation")
        C = tf.subtract(1.0, T, name="carry_gate")

        y = tf.add(
            tf.multiply(H, T),
            tf.multiply(inputs, C),
            "y")
        return y
Beispiel #11
0
 def order_embeddings(self) -> tf.Tensor:
     # initialization in the same way as in original CS2S implementation
     with tf.variable_scope("input_projection"):
         return get_variable("order_embeddings", [
             self.max_input_length, self.input_sequence.embedding_sizes[0]
         ],
                             initializer=tf.glorot_normal_initializer())
Beispiel #12
0
    def _vector_logit(self,
                      projected_decoder_state: tf.Tensor,
                      vector_value: tf.Tensor,
                      scope: str) -> tf.Tensor:
        """Get logit for a single vector, e.g., sentinel vector."""
        assert_shape(projected_decoder_state, [-1, 1, -1])
        assert_shape(vector_value, [-1, -1])

        with tf.variable_scope("{}_logit".format(scope)):
            vector_bias = get_variable(
                "vector_bias", [],
                initializer=tf.zeros_initializer())

            proj_vector_for_logit = tf.expand_dims(
                tf.layers.dense(vector_value, self.attention_state_size,
                                name="vector_projection"), 1)

            if self._share_projections:
                proj_vector_for_ctx = proj_vector_for_logit
            else:
                proj_vector_for_ctx = tf.expand_dims(
                    tf.layers.dense(vector_value, self.attention_state_size,
                                    name="vector_ctx_proj"), 1)

            vector_logit = tf.reduce_sum(
                self.attn_v
                * tf.tanh(projected_decoder_state + proj_vector_for_logit),
                [2]) + vector_bias
            assert_shape(vector_logit, [-1, 1])
            return proj_vector_for_ctx, vector_logit
Beispiel #13
0
    def decoding_b(self) -> Optional[tf.Variable]:
        if self.tie_embeddings:
            return tf.zeros(len(self.vocabulary))

        with tf.name_scope("output_projection"):
            return get_variable("state_to_word_b", [len(self.vocabulary)],
                                initializer=tf.zeros_initializer())
Beispiel #14
0
    def _vector_logit(self, projected_decoder_state: tf.Tensor,
                      vector_value: tf.Tensor, scope: str) -> tf.Tensor:
        """Get logit for a single vector, e.g., sentinel vector."""
        assert_shape(projected_decoder_state, [-1, 1, -1])
        assert_shape(vector_value, [-1, -1])

        with tf.variable_scope("{}_logit".format(scope)):
            vector_bias = get_variable("vector_bias", [],
                                       initializer=tf.zeros_initializer())

            proj_vector_for_logit = tf.expand_dims(
                tf.layers.dense(vector_value,
                                self.attention_state_size,
                                name="vector_projection"), 1)

            if self._share_projections:
                proj_vector_for_ctx = proj_vector_for_logit
            else:
                proj_vector_for_ctx = tf.expand_dims(
                    tf.layers.dense(vector_value,
                                    self.attention_state_size,
                                    name="vector_ctx_proj"), 1)

            vector_logit = tf.reduce_sum(
                self.attn_v *
                tf.tanh(projected_decoder_state + proj_vector_for_logit),
                [2]) + vector_bias
            assert_shape(vector_logit, [-1, 1])
            return proj_vector_for_ctx, vector_logit
Beispiel #15
0
 def encoder_attn_biases(self) -> List[tf.Variable]:
     return [
         get_variable(name="attn_bias_{}".format(i),
                      shape=[],
                      initializer=tf.zeros_initializer())
         for i in range(len(self._encoders_tensors))
     ]
 def embedded_inputs(self) -> tf.Tensor:
     with tf.variable_scope("input_projection"):
         embedding_matrix = get_variable(
             "word_embeddings", [len(self.vocabulary), self.embedding_size],
             initializer=tf.glorot_uniform_initializer())
         return dropout(
             tf.nn.embedding_lookup(embedding_matrix, self.inputs),
             self.dropout_keep_prob, self.train_mode)
Beispiel #17
0
    def __init__(self,
                 name: str,
                 encoders: List[Attendable],
                 attention_state_size: int,
                 share_attn_projections: bool = False,
                 use_sentinels: bool = False,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        MultiAttention.__init__(self,
                                name=name,
                                attention_state_size=attention_state_size,
                                share_attn_projections=share_attn_projections,
                                use_sentinels=use_sentinels,
                                reuse=reuse,
                                save_checkpoint=save_checkpoint,
                                load_checkpoint=load_checkpoint,
                                initializers=initializers)
        self._encoders = encoders

        # pylint: disable=protected-access
        self._encoders_tensors = [
            get_attention_states(e) for e in self._encoders
        ]
        self._encoders_masks = [get_attention_mask(e) for e in self._encoders]
        # pylint: enable=protected-access

        for e_m in self._encoders_masks:
            assert_shape(e_m, [-1, -1])

        for e_t in self._encoders_tensors:
            assert_shape(e_t, [-1, -1, -1])

        with self.use_scope():
            self.encoder_projections_for_logits = \
                self.get_encoder_projections("logits_projections")

            self.encoder_attn_biases = [
                get_variable(name="attn_bias_{}".format(i),
                             shape=[],
                             initializer=tf.zeros_initializer())
                for i in range(len(self._encoders_tensors))
            ]

            if self._share_projections:
                self.encoder_projections_for_ctx = \
                    self.encoder_projections_for_logits
            else:
                self.encoder_projections_for_ctx = \
                    self.get_encoder_projections("context_projections")

            if self._use_sentinels:
                self._encoders_masks.append(
                    tf.ones([tf.shape(self._encoders_masks[0])[0], 1]))

            self.masks_concat = tf.concat(self._encoders_masks, 1)
Beispiel #18
0
    def decoding_b(self) -> Optional[tf.Variable]:
        if self.tie_embeddings:
            return tf.zeros(len(self.vocabulary))

        with tf.name_scope("output_projection"):
            return get_variable(
                "state_to_word_b",
                [len(self.vocabulary)],
                initializer=tf.zeros_initializer())
Beispiel #19
0
    def _residual_conv(self, input_signals: tf.Tensor, name: str):
        with tf.variable_scope(name):
            # Initialized as described in the paper.
            # Note: this should be equivalent to tf.glorot_normal_initializer
            init_deviat = np.sqrt(4 / self.conv_features)
            convolution_filters = get_variable(
                "convolution_filters",
                [self.kernel_width, self.conv_features,
                 2 * self.conv_features],
                initializer=tf.random_normal_initializer(stddev=init_deviat))

            bias = get_variable(
                name="conv_bias",
                shape=[2 * self.conv_features],
                initializer=tf.zeros_initializer())

            conv = (tf.nn.conv1d(input_signals, convolution_filters, 1, "SAME")
                    + bias)

            return glu(conv) + input_signals
Beispiel #20
0
    def modality_matrix(self) -> tf.Tensor:
        """Create an embedding matrix for varyining target modalities.

        Used to embed different target space modalities in the tensor2tensor
        models (e.g. during the zero-shot translation).
        """
        emb_size = self.input_sequence.temporal_states.shape.as_list()[-1]
        return get_variable(name="target_modality_embedding_matrix",
                            shape=[32, emb_size],
                            dtype=tf.float32,
                            initializer=tf.glorot_uniform_initializer())
 def embedded_inputs(self) -> tf.Tensor:
     with tf.variable_scope("input_projection"):
         embedding_matrix = get_variable(
             "word_embeddings",
             [len(self.vocabulary), self.embedding_size],
             initializer=tf.variance_scaling_initializer(
                 mode="fan_avg", distribution="uniform"))
         return dropout(
             tf.nn.embedding_lookup(embedding_matrix, self.inputs),
             self.dropout_keep_prob,
             self.train_mode)
Beispiel #22
0
    def embedding_matrix(self) -> tf.Variable:
        """Variables and operations for embedding of input words.

        If we are reusing word embeddings, this function takes the embedding
        matrix from the first encoder
        """
        if self.embeddings_source is not None:
            return self.embeddings_source.embedding_matrix

        return get_variable(name="word_embeddings",
                            shape=[len(self.vocabulary), self.embedding_size],
                            initializer=tf.glorot_uniform_initializer())
Beispiel #23
0
    def cnn_encoded(self) -> tf.Tensor:
        """1D convolution with max-pool that processing characters."""
        dropped_inputs = dropout(self.input_sequence.temporal_states,
                                 self.dropout_keep_prob, self.train_mode)

        pooled_outputs = []
        for filter_size, num_filters in self.filters:
            with tf.variable_scope("conv-maxpool-%s" % filter_size):
                filter_shape = [filter_size, self.input_sequence.dimension,
                                num_filters]
                w_filter = get_variable(
                    "conv_W", filter_shape,
                    initializer=tf.variance_scaling_initializer(
                        mode="fan_avg", distribution="uniform"))
                b_filter = get_variable(
                    "conv_bias", [num_filters],
                    initializer=tf.zeros_initializer())
                conv = tf.nn.conv1d(
                    dropped_inputs,
                    w_filter,
                    stride=1,
                    padding="SAME",
                    name="conv")

                # Apply nonlinearity
                conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter))

                # Max-pooling over the output segments
                expanded_conv_relu = tf.expand_dims(conv_relu, -1)
                pooled = tf.nn.max_pool(
                    expanded_conv_relu,
                    ksize=[1, self.segment_size, 1, 1],
                    strides=[1, self.segment_size, 1, 1],
                    padding="SAME",
                    name="maxpool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        concat = tf.concat(pooled_outputs, axis=2)
        return tf.squeeze(concat, [3])
    def __init__(self,
                 name: str,
                 dimension: int,
                 data_id: str,
                 output_shape: int = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        """Instantiate StatefulFiller.

        Args:
            name: Name of the model part.
            dimension: Dimensionality of the input.
            data_id: Series containing the numpy objects.
            output_shape: Dimension of optional state projection.
        """
        ModelPart.__init__(self, name, save_checkpoint, load_checkpoint,
                           initializers)
        check_argument_types()

        if dimension <= 0:
            raise ValueError("Input vector dimension must be positive.")
        if output_shape is not None and output_shape <= 0:
            raise ValueError("Output vector dimension must be positive.")

        self.vector = tf.placeholder(tf.float32, shape=[None, dimension])
        self.data_id = data_id

        with self.use_scope():
            if output_shape is not None and dimension != output_shape:
                project_w = get_variable(shape=[dimension, output_shape],
                                         name="img_init_proj_W")
                project_b = get_variable(name="img_init_b",
                                         shape=[output_shape],
                                         initializer=tf.zeros_initializer())

                self._encoded = tf.matmul(self.vector, project_w) + project_b
            else:
                self._encoded = self.vector
Beispiel #25
0
    def modality_matrix(self) -> tf.Tensor:
        """Create an embedding matrix for varyining target modalities.

        Used to embed different target space modalities in the tensor2tensor
        models (e.g. during the zero-shot translation).
        """
        emb_size = self.input_sequence.temporal_states.shape.as_list()[-1]
        return get_variable(
            name="target_modality_embedding_matrix",
            shape=[32, emb_size],
            dtype=tf.float32,
            initializer=tf.variance_scaling_initializer(
                mode="fan_avg", distribution="uniform"))
Beispiel #26
0
    def embedding_matrices(self) -> List[tf.Tensor]:
        """Return a list of embedding matrices for each factor."""

        # Note: Embedding matrices are numbered rather than named by the data
        # id so the data_id string does not need to be the same across
        # experiments

        return [
            get_variable(
                name="embedding_matrix_{}".format(i),
                shape=[vocab_size, emb_size],
                initializer=tf.glorot_uniform_initializer())
            for i, (data_id, vocab_size, emb_size) in enumerate(zip(
                self.data_ids, self.vocabulary_sizes, self.embedding_sizes))]
Beispiel #27
0
    def embedding_matrix(self) -> tf.Variable:
        """Variables and operations for embedding of input words.

        If we are reusing word embeddings, this function takes the embedding
        matrix from the first encoder
        """
        if self.embeddings_source is not None:
            return self.embeddings_source.embedding_matrix

        assert self.embedding_size is not None

        return get_variable(
            name="word_embeddings",
            shape=[len(self.vocabulary), self.embedding_size])
Beispiel #28
0
    def decoding_w(self) -> tf.Variable:
        if (self.tie_embeddings
                and self.embedding_size != self.output_dimension):
            raise ValueError(
                "`embedding_size must be equal to the output_projection "
                "size when using the `tie_embeddings` option")

        with tf.name_scope("output_projection"):
            if self.tie_embeddings:
                return tf.transpose(self.embedding_matrix)

            return get_variable(
                "state_to_word_W",
                [self.output_dimension, len(self.vocabulary)],
                initializer=tf.glorot_uniform_initializer())
Beispiel #29
0
    def decoding_w(self) -> tf.Variable:
        if (self.tie_embeddings
                and self.embedding_size != self.output_dimension):
            raise ValueError(
                "`embedding_size must be equal to the output_projection "
                "size when using the `tie_embeddings` option")

        with tf.name_scope("output_projection"):
            if self.tie_embeddings:
                return tf.transpose(self.embedding_matrix)

            return get_variable(
                "state_to_word_W",
                [self.output_dimension, len(self.vocabulary)],
                initializer=tf.random_uniform_initializer(-0.5, 0.5))
Beispiel #30
0
    def embedding_matrices(self) -> List[tf.Tensor]:
        """Return a list of embedding matrices for each factor."""

        # Note: Embedding matrices are numbered rather than named by the data
        # id so the data_id string does not need to be the same across
        # experiments

        if self.embeddings_source is not None:
            return self.embeddings_source.embedding_matrices

        return [
            get_variable(
                name="embedding_matrix_{}".format(i),
                shape=[vocab_size, emb_size],
                trainable=self.trainable)
            for i, (data_id, vocab_size, emb_size) in enumerate(zip(
                self.data_ids, self.vocabulary_sizes, self.embedding_sizes))]
Beispiel #31
0
    def embedding_matrices(self) -> List[tf.Tensor]:
        """Return a list of embedding matrices for each factor."""

        # Note: Embedding matrices are numbered rather than named by the data
        # id so the data_id string does not need to be the same across
        # experiments

        if self.embeddings_source is not None:
            return self.embeddings_source.embedding_matrices

        return [
            get_variable(name="embedding_matrix_{}".format(i),
                         shape=[vocab_size, emb_size],
                         trainable=self.trainable)
            for i, (data_id, vocab_size, emb_size) in enumerate(
                zip(self.data_ids, self.vocabulary_sizes,
                    self.embedding_sizes))
        ]
Beispiel #32
0
    def __init__(self,
                 name: str,
                 attention_state_size: int,
                 share_attn_projections: bool = False,
                 use_sentinels: bool = False,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        BaseAttention.__init__(self, name, save_checkpoint, load_checkpoint,
                               initializers)
        self.attentions_in_time = []  # type: List[tf.Tensor]
        self.attention_state_size = attention_state_size
        self._share_projections = share_attn_projections
        self._use_sentinels = use_sentinels

        self.att_scope_name = "attention_{}".format(name)

        with self.use_scope():
            self.attn_v = get_variable(
                "attn_v", [1, 1, self.attention_state_size],
                initializer=tf.random_normal_initializer(stddev=0.001))
 def key_projection_matrix(self) -> tf.Variable:
     return get_variable(
         name="attn_key_projection",
         # TODO tohle neni spravne
         shape=[self.context_vector_size, self.state_size])
 def query_projection_matrix(self) -> tf.Variable:
     with tf.variable_scope("Attention"):
         return get_variable(name="attn_query_projection",
                             shape=[self.query_state_size, self.state_size])
Beispiel #35
0
 def query_projection_matrix(self) -> tf.Variable:
     with tf.variable_scope("Attention"):
         return get_variable(
             name="attn_query_projection",
             shape=[self.query_state_size, self.state_size])
Beispiel #36
0
 def coverage_weights(self) -> tf.Variable:
     return get_variable("coverage_matrix", [1, 1, 1, self.state_size])
 def decoding_residual_w(self) -> tf.Variable:
     input_dim = self.encoder.input_sequence.dimension
     return get_variable(name="emb_to_word_W",
                         shape=[input_dim, len(self.vocabulary)],
                         initializer=tf.glorot_normal_initializer())
 def decoding_w(self) -> tf.Variable:
     return get_variable(name="state_to_word_W",
                         shape=[self.rnn_size,
                                len(self.vocabulary)])
 def bias_term(self) -> tf.Variable:
     return get_variable(name="attn_bias",
                         shape=[],
                         initializer=tf.zeros_initializer())
Beispiel #40
0
 def encoder_attn_biases(self) -> List[tf.Variable]:
     return [get_variable(name="attn_bias_{}".format(i), shape=[],
                          initializer=tf.zeros_initializer())
             for i in range(len(self._encoders_tensors))]
Beispiel #41
0
 def key_projection_matrix(self) -> tf.Variable:
     return get_variable(
         name="attn_key_projection",
         # TODO tohle neni spravne
         shape=[self.context_vector_size, self.state_size])
Beispiel #42
0
 def fertility_weights(self) -> tf.Variable:
     return get_variable(
         "fertility_matrix", [1, 1, self.context_vector_size])
Beispiel #43
0
 def similarity_bias_vector(self) -> tf.Variable:
     return get_variable(
         name="attn_similarity_v",
         shape=[self.state_size])
Beispiel #44
0
 def attn_v(self) -> tf.Tensor:
     return get_variable(
         "attn_v", [1, 1, self.attention_state_size],
         initializer=tf.random_normal_initializer(stddev=0.001))
Beispiel #45
0
 def projection_bias_vector(self) -> tf.Variable:
     return get_variable(
         name="attn_projection_bias", shape=[self.state_size],
         initializer=tf.zeros_initializer())
 def similarity_bias_vector(self) -> tf.Variable:
     return get_variable(name="attn_similarity_v", shape=[self.state_size])
 def projection_bias_vector(self) -> tf.Variable:
     return get_variable(name="attn_projection_bias",
                         shape=[self.state_size],
                         initializer=tf.zeros_initializer())
 def decoding_w(self) -> tf.Variable:
     return get_variable(name="state_to_word_W",
                         shape=[self.rnn_size,
                                len(self.vocabulary)],
                         initializer=tf.glorot_normal_initializer())
 def decoding_b(self) -> tf.Variable:
     return get_variable(name="state_to_word_b",
                         shape=[len(self.vocabulary)],
                         initializer=tf.zeros_initializer())
Beispiel #50
0
 def bias_term(self) -> tf.Variable:
     return get_variable(
         name="attn_bias", shape=[],
         initializer=tf.zeros_initializer())