예제 #1
0
    def __init__(self, hparams=None):
        ClassifierBase.__init__(self, hparams)

        with tf.variable_scope(self.variable_scope):
            encoder_hparams = utils.dict_fetch(
                hparams, Conv1DEncoder.default_hparams())
            self._encoder = Conv1DEncoder(hparams=encoder_hparams)

            # Add an additional dense layer if needed
            self._num_classes = self._hparams.num_classes
            if self._num_classes > 0:
                if self._hparams.num_dense_layers <= 0:
                    self._encoder.append_layer({"type": "Flatten"})

                logit_kwargs = self._hparams.logit_layer_kwargs
                if logit_kwargs is None:
                    logit_kwargs = {}
                elif not isinstance(logit_kwargs, HParams):
                    raise ValueError(
                        "hparams['logit_layer_kwargs'] must be a dict.")
                else:
                    logit_kwargs = logit_kwargs.todict()
                logit_kwargs.update({"units": self._num_classes})
                if 'name' not in logit_kwargs:
                    logit_kwargs['name'] = "logit_layer"

                self._encoder.append_layer(
                    {"type": "Dense", "kwargs": logit_kwargs})
예제 #2
0
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):
        super(XLNetClassifier, self).__init__(hparams=hparams)

        with tf.variable_scope(self.variable_scope):
            tf.get_variable_scope().set_initializer(
                get_initializer(self._hparams.initializer))
            # Creates the underlying encoder
            encoder_hparams = dict_fetch(hparams,
                                         XLNetEncoder.default_hparams())
            if encoder_hparams is not None:
                encoder_hparams['name'] = "encoder"
            self._encoder = XLNetEncoder(
                pretrained_model_name=pretrained_model_name,
                cache_dir=cache_dir,
                hparams=encoder_hparams)
            if self._hparams.use_projection:
                self.projection = get_layer(
                    hparams={
                        "type": "Dense",
                        "kwargs": {
                            "units": self._encoder.output_size
                        }
                    })

            # Creates an dropout layer
            drop_kwargs = {"rate": self._hparams.dropout}
            layer_hparams = {"type": "Dropout", "kwargs": drop_kwargs}
            self._dropout_layer = get_layer(hparams=layer_hparams)

            # Creates an additional classification layer if needed
            self._num_classes = self._hparams.num_classes
            if self._num_classes <= 0:
                self._logit_layer = None
            else:
                logit_kwargs = self._hparams.logit_layer_kwargs
                if logit_kwargs is None:
                    logit_kwargs = {}
                elif not isinstance(logit_kwargs, HParams):
                    raise ValueError(
                        "hparams['logit_layer_kwargs'] must be a dict.")
                else:
                    logit_kwargs = logit_kwargs.todict()
                logit_kwargs.update({"units": self._num_classes})
                if 'name' not in logit_kwargs:
                    logit_kwargs['name'] = "logit_layer"

                layer_hparams = {"type": "Dense", "kwargs": logit_kwargs}
                self._logit_layer = get_layer(hparams=layer_hparams)
예제 #3
0
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):

        super().__init__(hparams=hparams)

        with tf.variable_scope(self.variable_scope):
            encoder_hparams = dict_fetch(hparams,
                                         GPT2Encoder.default_hparams())
            if encoder_hparams is not None:
                encoder_hparams['name'] = None

            self._encoder = GPT2Encoder(
                pretrained_model_name=pretrained_model_name,
                cache_dir=cache_dir,
                hparams=encoder_hparams)

            # Creates an dropout layer
            drop_kwargs = {"rate": self._hparams.dropout}
            layer_hparams = {"type": "Dropout", "kwargs": drop_kwargs}
            self._dropout_layer = get_layer(hparams=layer_hparams)

            # Creates an additional classification layer if needed
            self._num_classes = self._hparams.num_classes
            if self._num_classes <= 0:
                self._logit_layer = None
            else:
                logit_kwargs = self._hparams.logit_layer_kwargs
                if logit_kwargs is None:
                    logit_kwargs = {}
                elif not isinstance(logit_kwargs, HParams):
                    raise ValueError(
                        "hparams['logit_layer_kwargs'] must be a dict.")
                else:
                    logit_kwargs = logit_kwargs.todict()
                logit_kwargs.update({"units": self._num_classes})
                if 'name' not in logit_kwargs:
                    logit_kwargs['name'] = "logit_layer"

                layer_hparams = {"type": "Dense", "kwargs": logit_kwargs}
                self._logit_layer = get_layer(hparams=layer_hparams)
예제 #4
0
    def __init__(self,
                 cell=None,
                 cell_dropout_mode=None,
                 output_layer=None,
                 hparams=None):
        ClassifierBase.__init__(self, hparams)

        with tf.variable_scope(self.variable_scope):
            # Creates the underlying encoder
            encoder_hparams = utils.dict_fetch(
                hparams, UnidirectionalRNNEncoder.default_hparams())
            if encoder_hparams is not None:
                encoder_hparams['name'] = None
            self._encoder = UnidirectionalRNNEncoder(
                cell=cell,
                cell_dropout_mode=cell_dropout_mode,
                output_layer=output_layer,
                hparams=encoder_hparams)

            # Creates an additional classification layer if needed
            self._num_classes = self._hparams.num_classes
            if self._num_classes <= 0:
                self._logit_layer = None
            else:
                logit_kwargs = self._hparams.logit_layer_kwargs
                if logit_kwargs is None:
                    logit_kwargs = {}
                elif not isinstance(logit_kwargs, HParams):
                    raise ValueError(
                        "hparams['logit_layer_kwargs'] must be a dict.")
                else:
                    logit_kwargs = logit_kwargs.todict()
                logit_kwargs.update({"units": self._num_classes})
                if 'name' not in logit_kwargs:
                    logit_kwargs['name'] = "logit_layer"

                layer_hparams = {"type": "Dense", "kwargs": logit_kwargs}
                self._logit_layer = layers.get_layer(hparams=layer_hparams)
예제 #5
0
    def __init__(self,
                 pretrained_model_name=None,
                 cache_dir=None,
                 hparams=None):
        super(XLNetEncoder, self).__init__(hparams=hparams)

        self.load_pretrained_config(pretrained_model_name, cache_dir)

        num_layers = self._hparams.num_layers
        use_segments = self._hparams.use_segments
        untie_r = self._hparams.untie_r

        with tf.variable_scope(self.variable_scope):

            if untie_r:
                self.r_w_bias = tf.get_variable('r_w_bias', [
                    num_layers, self._hparams.num_heads, self._hparams.head_dim
                ],
                                                dtype=tf.float32)
                self.r_r_bias = tf.get_variable('r_r_bias', [
                    num_layers, self._hparams.num_heads, self._hparams.head_dim
                ],
                                                dtype=tf.float32)
            else:
                self.r_w_bias = tf.get_variable(
                    'r_w_bias',
                    [self._hparams.num_heads, self._hparams.head_dim],
                    dtype=tf.float32)
                self.r_r_bias = tf.get_variable(
                    'r_r_bias',
                    [self._hparams.num_heads, self._hparams.head_dim],
                    dtype=tf.float32)

            if use_segments:
                self.segment_embed = tf.get_variable('seg_embed', [
                    num_layers, 2, self._hparams.num_heads,
                    self._hparams.head_dim
                ],
                                                     dtype=tf.float32)
                self.r_s_bias = (tf.get_variable(
                    'r_s_bias', [
                        num_layers, self._hparams.num_heads,
                        self._hparams.head_dim
                    ],
                    dtype=tf.float32) if untie_r else tf.get_variable(
                        'r_s_bias',
                        [self._hparams.num_heads, self._hparams.head_dim],
                        dtype=tf.float32))
            else:
                self.segment_embed = None
                self.r_s_bias = None

            # Word embedding
            self.word_embedder = WordEmbedder(
                vocab_size=self._hparams.vocab_size,
                hparams={"dim": self._hparams.hidden_dim})

            # Position embedding
            self.pos_embed = RelativePositionalEncoding(
                hparams={
                    "dim": self._hparams.hidden_dim,
                    "max_seq_len": self._hparams.max_seq_len
                })

            self.attn_layers = []
            self.ff_layers = []
            rel_attn_hparams = dict_fetch(
                self._hparams, RelativeMutiheadAttention.default_hparams())
            rel_attn_hparams["name"] = "rel_attn"

            ff_hparams = dict_fetch(self._hparams,
                                    PositionWiseFF.default_hparams())
            ff_hparams["name"] = "ff"

            for i in range(num_layers):
                with tf.variable_scope("layer_{}".format(i)):
                    if self._hparams.untie_r:
                        if use_segments:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias[i],
                                    self.r_w_bias[i],
                                    self.r_s_bias[i],
                                    self.segment_embed[i],
                                    hparams=rel_attn_hparams))
                        else:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias[i],
                                    self.r_w_bias[i],
                                    hparams=rel_attn_hparams))
                    else:
                        if use_segments:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias,
                                    self.r_w_bias,
                                    self.r_s_bias,
                                    self.segment_embed[i],
                                    hparams=rel_attn_hparams))
                        else:
                            self.attn_layers.append(
                                RelativeMutiheadAttention(
                                    self.r_r_bias,
                                    self.r_w_bias,
                                    hparams=rel_attn_hparams))
                    self.ff_layers.append(PositionWiseFF(hparams=ff_hparams))

            dropout_hparams = {
                "type": "Dropout",
                "kwargs": {
                    "rate": self._hparams.dropout
                }
            }
            self.dropout = get_layer(hparams=dropout_hparams)

            self.mask_embed = tf.get_variable('mask_emb',
                                              [1, 1, self.hparams.hidden_dim],
                                              dtype=tf.float32)