def __init__(self, model_config, mode):
        """Create projection."""
        def _get_params(varname, default_value=None):
            value = model_config[
                varname] if varname in model_config else default_value
            default = "" if varname in model_config else " (default)"
            logging.info("%s = %s%s", varname, value, default)
            setattr(self, varname, value)

        self.mode = mode
        _get_params("feature_size")
        _get_params("max_seq_len", 0)
        _get_params("add_eos_tag", False)
        _get_params("add_bos_tag", False)
        _get_params("hashtype", "murmur")
        _get_params("split_on_space", True)
        _get_params("token_separators", "")
        _get_params("vocabulary", "")
        _get_params("quantize")
        _get_params("word_novelty_bits", 0)
        _get_params("doc_size_levels", 0)
        self.distortion_probability = 0.0
        if mode == base_layers.TRAIN:
            _get_params("distortion_probability", 0.0)
        parameters = base_layers.Parameters(mode, self.quantize)
        super(ProjectionLayer, self).__init__(parameters=parameters)
Ejemplo n.º 2
0
    def __init__(self, config, mode):
        super(Encoder, self).__init__()

        def _get_params(varname, default_value=None):
            value = config[varname] if varname in config else default_value
            default = "" if varname in config else " (default)"
            logging.info("%s = %s%s", varname, value, default)
            setattr(self, varname, value)

        _get_params("labels")
        _get_params("quantize", True)
        _get_params("embedding_regularizer_scale", 35e-3)
        _get_params("embedding_size", 64)
        _get_params("unigram_channels", 0)
        _get_params("bigram_channels", 0)
        _get_params("trigram_channels", 0)
        _get_params("fourgram_channels", 0)
        _get_params("fivegram_channels", 0)
        _get_params("skip1bigram_channels", 0)
        _get_params("skip2bigram_channels", 0)
        _get_params("network_regularizer_scale", 1e-4)
        _get_params("keep_prob", 0.5)
        self.num_classes = len(self.labels)

        self.parameters = base_layers.Parameters(
            mode,
            quantize=self.quantize,
            regularizer_scale=self.embedding_regularizer_scale)
        self.values_fc = dense_layers.BaseQDenseVarLen(
            units=self.embedding_size, rank=3, parameters=self.parameters)
        self.attention_fc = dense_layers.BaseQDenseVarLen(
            units=self.embedding_size, rank=3, parameters=self.parameters)
        self.dropout = tf.keras.layers.Dropout(rate=(1 - self.keep_prob))

        self.parameters = copy.copy(self.parameters)
        self.parameters.regularizer_scale = self.network_regularizer_scale
        self.attention_pool_layers = []
        self._add_attention_pool_layer(self.unigram_channels, 1)
        self._add_attention_pool_layer(self.bigram_channels, 2)
        self._add_attention_pool_layer(self.trigram_channels, 3)
        self._add_attention_pool_layer(self.fourgram_channels, 4)
        self._add_attention_pool_layer(self.fivegram_channels, 5)
        self._add_attention_pool_layer(self.skip1bigram_channels, None, 1)
        self._add_attention_pool_layer(self.skip2bigram_channels, None, 2)

        self.concat_quantizer = quantization_layers.ConcatQuantization(
            axis=1, parameters=self.parameters)
        self.final_fc = dense_layers.BaseQDense(units=self.num_classes,
                                                rank=2,
                                                parameters=self.parameters,
                                                activation=None)
Ejemplo n.º 3
0
    def __init__(self, config, mode, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        def _get_params(varname, default_value=None):
            value = config[varname] if varname in config else default_value
            default = "" if varname in config else " (default)"
            logging.info("%s = %s%s", varname, value, default)
            setattr(self, varname, value)

        _get_params("projection_bottleneck_size")
        _get_params("qrnn_state_size")
        _get_params("qrnn_kernel_width", 3)
        _get_params("qrnn_zoneout_probability")
        _get_params("number_qrnn_layers")
        _get_params("labels", [])
        _get_params("regularizer_scale")
        _get_params("quantize")

        self.num_classes = len(self.labels)
        self.parameters = base_layers.Parameters(
            mode,
            quantize=self.quantize,
            regularizer_scale=self.regularizer_scale)

        self.bottleneck_layer = dense_layers.BaseQDenseVarLen(
            units=self.projection_bottleneck_size,
            rank=3,
            parameters=self.parameters)

        self.qrnn_stack = qrnn_layers.QRNNBidirectionalStack(
            parameters=self.parameters,
            zoneout_probability=self.qrnn_zoneout_probability,
            kwidth=self.qrnn_kernel_width,
            state_size=self.qrnn_state_size,
            num_layers=self.number_qrnn_layers)

        self.attention_pool = misc_layers.AttentionPooling(
            parameters=self.parameters)

        if self.num_classes:
            self.final_fc = dense_layers.BaseQDense(units=self.num_classes,
                                                    rank=2,
                                                    parameters=self.parameters,
                                                    activation=None)
    def __init__(self, model_config, mode):
        """Create projection."""
        def _get_params(varname, default_value=None):
            value = model_config[
                varname] if varname in model_config else default_value
            default = "" if varname in model_config else " (default)"
            logging.info("%s = %s%s", varname, value, default)
            setattr(self, varname, value)

        self.mode = mode
        _get_params("feature_size")
        _get_params("add_eos_tag", False)
        _get_params("add_bos_tag", False)
        _get_params("vocabulary", "")
        _get_params("quantize")
        self.distortion_probability = 0.0
        if mode == base_layers.TRAIN:
            _get_params("distortion_probability", 0.0)
        parameters = base_layers.Parameters(mode, self.quantize)
        super(ProjectionLayerPreSegmented,
              self).__init__(parameters=parameters)
Ejemplo n.º 5
0
    def __init__(self, config, mode):
        def _get_params(varname, default_value=None):
            value = config[varname] if varname in config else default_value
            default = "" if varname in config else " (default)"
            logging.info("%s = %s%s", varname, value, default)
            setattr(self, varname, value)

        _get_params("intermediate_size")
        _get_params("max_time_step")
        _get_params("embedding_size")
        _get_params("vocabulary_size")
        _get_params("num_layers")
        _get_params("labels")
        _get_params("regularizer_scale")
        _get_params("num_heads")
        _get_params("model_dimension")
        _get_params("quantize")
        _get_params("activation_dropout_rate", 0.0)
        _get_params("attention_dropout_rate", 0.0)
        self.parameters = base_layers.Parameters(mode, self.quantize,
                                                 self.regularizer_scale)

        super(Model, self).__init__()
Ejemplo n.º 6
0
    def __init__(self, config, mode):

        super(Model, self).__init__()

        def _get_params(varname, default_value=None):
            value = config[varname] if varname in config else default_value
            default = "" if varname in config else " (default)"
            logging.info("%s = %s%s", varname, value, default)
            setattr(self, varname, value)

        _get_params("intermediate_size")
        _get_params("max_dec_time_step")
        _get_params("max_enc_time_step")
        _get_params("embedding_size")
        _get_params("vocabulary_size")
        _get_params("num_layers")
        _get_params("labels")
        _get_params("regularizer_scale")
        _get_params("num_heads")
        _get_params("model_dimension")
        _get_params("beam_size", 1)
        _get_params("quantize", True)
        _get_params("cached_kv", False)
        _get_params("attention_dropout_rate", 0.0)
        _get_params("activation_dropout_rate", 0.0)
        # If set, a separate dense layer is used to generate the logits instead of
        # re-using the input embedding table.
        _get_params("use_output_layer", False)
        self.parameters = base_layers.Parameters(mode, self.quantize,
                                                 self.regularizer_scale)
        # Activation/Normalization enabled on input bottleneck as there is no
        # temporal information.
        self.input_bottleneck = dense_layers.BaseQDenseVarLen(
            self.model_dimension, rank=3, parameters=self.parameters)
        self.output_bottleneck = dense_layers.BaseQDense(
            self.embedding_size,
            normalize=False,
            activation=None,
            bias=False,
            parameters=self.parameters)

        self.embedding = embedding_layers.EmbeddingFullyConnected(
            shape=[self.vocabulary_size, self.embedding_size],
            initializer=tf.random_uniform_initializer(-math.sqrt(3),
                                                      math.sqrt(3)),
            parameters=self.parameters)
        if self.use_output_layer:
            self.output_layer = dense_layers.BaseQDense(
                self.vocabulary_size,
                activation=None,
                normalize=False,
                bias=False,
                parameters=self.parameters)
        self.positional_embedding = embedding_layers.EmbeddingLayer(
            shape=[self.max_dec_time_step, self.model_dimension],
            initializer=tf.random_uniform_initializer(-math.sqrt(3),
                                                      math.sqrt(3)),
            parameters=self.parameters)
        self.ln = normalization_layers.LayerNormalization(
            parameters=self.parameters)
        self.qact = quantization_layers.ActivationQuantization(
            parameters=self.parameters)
        # Scales the weights for computing logits.
        self.logits_fc_weights_scale_factor = None
        self.logits_fc_bias = self.add_weight(
            "logits_fc_bias",
            shape=[self.vocabulary_size],
            initializer=tf.constant_initializer(0),
            dtype="float32")
        # Optional bias which can be used to mask logits output.
        self.output_bias = None
        self.transformer_uniform_attn_decoder = TransformerUniformAttnDecoderStack(
            parameters=self.parameters,
            num_layers=self.num_layers,
            intermediate_size=self.intermediate_size,
            embedding_size=self.embedding_size,
            max_time_step=self.max_dec_time_step,
            num_heads=self.num_heads,
            model_dimension=self.model_dimension,
            vocabulary_size=self.vocabulary_size,
            beam_size=self.beam_size,
            cached_kv=self.cached_kv,
            attention_dropout_rate=self.attention_dropout_rate,
            activation_dropout_rate=self.activation_dropout_rate)
        # Beam search output.
        self.finished_seq = None
        self.finished_scores = None
Ejemplo n.º 7
0
    def __init__(self, config, mode, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        def _get_params(varname, default_value=None):
            value = config.get(varname, default_value)
            default = "" if varname in config else " (default)"
            logging.info("%s = %s%s", varname, value, default)
            setattr(self, varname, value)

        _get_params("feature_size")
        _get_params("bottleneck_size", self.feature_size)
        _get_params("qrnn_state_size")
        _get_params("qrnn_kernel_width", 3)
        _get_params("qrnn_zoneout_probability")
        _get_params("number_qrnn_layers")
        _get_params("labels", [])
        _get_params("regularizer_scale")
        _get_params("quantize")
        _get_params("gbst_max_token_len", 128)
        _get_params("gbst_downsample_rate", 1)
        _get_params("gbst_max_subword_block_width", 4)
        _get_params("gbst_conv_kernel_size", 5)
        _get_params("gbst_block_mixing_mode")
        _get_params("gbst_add_block_pos_embed", False)
        _get_params("attn_pool_output", True)

        self.num_classes = len(config.get("labels", []))

        self.parameters = base_layers.Parameters(
            mode,
            quantize=self.quantize,
            regularizer_scale=self.regularizer_scale)
        # Including 3 additional special token ids (0=padding, 1=EOS, 2=UNK).
        self.vocabulary_size = 259
        self.embedding = embedding_layers.EmbeddingLayer(
            shape=[self.vocabulary_size, self.feature_size],
            parameters=self.parameters)

        self.bottleneck_layer = dense_layers.BaseQDenseVarLen(
            units=self.bottleneck_size, rank=3, parameters=self.parameters)

        self.gbst_layer = misc_layers.GBSTLayerV2(
            feature_size=self.bottleneck_size,
            max_seq_len=self.gbst_max_token_len,
            downsample_rate=self.gbst_downsample_rate,
            max_subword_block_width=self.gbst_max_subword_block_width,
            conv_kernel_size=self.gbst_conv_kernel_size,
            block_mixing_mode=self.gbst_block_mixing_mode,
            add_block_pos_embed=self.gbst_add_block_pos_embed,
            parameters=self.parameters)

        self.qrnn_stack = qrnn_layers.QRNNBidirectionalStack(
            parameters=self.parameters,
            zoneout_probability=self.qrnn_zoneout_probability,
            kwidth=self.qrnn_kernel_width,
            state_size=self.qrnn_state_size,
            num_layers=self.number_qrnn_layers)
        self.attention_pool = misc_layers.AttentionPooling(
            parameters=self.parameters)

        if self.num_classes:
            self.final_fc = dense_layers.BaseQDense(units=self.num_classes,
                                                    rank=2,
                                                    parameters=self.parameters,
                                                    activation=None)
Ejemplo n.º 8
0
  def __init__(self, config, mode, **kwargs):
    super(Encoder, self).__init__(**kwargs)

    def _get_params(varname, default_value=None):
      value = config[varname] if varname in config else default_value
      default = "" if varname in config else " (default)"
      logging.info("%s = %s%s", varname, value, default)
      setattr(self, varname, value)

    _get_params("labels", [])
    _get_params("regularizer_scale")
    _get_params("quantize")
    _get_params("feature_size")
    _get_params("bottleneck_size")

    self.max_seq_len = config.get("max_seq_len", 128)
    self.gbst_max_token_len = config.get("gbst_max_token_len", 128)
    # Including 3 additional special token ids (0=padding, 1=EOS, 2=UNK).
    self.vocabulary_size = config.get("vocabulary_size", 259)
    self.parameters = base_layers.Parameters(
        mode, quantize=self.quantize, regularizer_scale=self.regularizer_scale)

    self.embedding = embedding_layers.EmbeddingLayer(
        shape=[self.vocabulary_size, self.feature_size],
        parameters=self.parameters)
    self.gbst_downsample_rate = config.get("gbst_downsample_rate", 1)
    self.positional_embedding = embedding_layers.EmbeddingLayer(
        shape=[self.gbst_max_token_len, self.feature_size],
        parameters=self.parameters)
    self.ln = normalization_layers.LayerNormalization(
        parameters=self.parameters)
    self.qact = quantization_layers.ActivationQuantization(
        parameters=self.parameters)

    self.bottleneck_layer = None
    gbst_size = self.feature_size
    if self.bottleneck_size != self.feature_size:
      self.bottleneck_layer = dense_layers.BaseQDenseVarLen(
          self.bottleneck_size,
          rank=3,
          normalize=False,
          activation=None,
          parameters=self.parameters)
      gbst_size = self.bottleneck_size

    self.gbst_max_subword_block_width = config.get(
        "gbst_max_subword_block_width", 5)
    self.gbst_conv_kernel_size = config.get("gbst_conv_kernel_size", 5)
    self.gbst_block_mixing_mode = config.get("gbst_block_mixing_mode", None)
    self.gbst_layer = misc_layers.GBSTLayerV2(
        feature_size=gbst_size,
        max_seq_len=self.gbst_max_token_len,
        downsample_rate=self.gbst_downsample_rate,
        max_subword_block_width=self.gbst_max_subword_block_width,
        conv_kernel_size=self.gbst_conv_kernel_size,
        block_mixing_mode=self.gbst_block_mixing_mode,
        parameters=self.parameters)

    self.pool_windows = config.get("pool_windows", None)
    if self.pool_windows:
      self.transformer_encoder_layer = transformer_encoder.FunnelTransformerModel(
          config, mode)
    else:
      self.transformer_encoder_layer = transformer_encoder.ModelWithEmbeddings(
          config, mode)
    self.attention_pool = misc_layers.AttentionPooling(
        parameters=self.parameters)
    self.num_classes = len(self.labels)
    if self.num_classes:
      self.final_fc = dense_layers.BaseQDense(
          units=self.num_classes,
          rank=2,
          parameters=self.parameters,
          activation=None)